%% Copyright (c) 2014-2018, Loïc Hoguin %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above %% copyright notice and this permission notice appear in all copies. %% %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -module(cow_multipart). %% Parsing. -export([parse_headers/2]). -export([parse_body/2]). %% Building. -export([boundary/0]). -export([first_part/2]). -export([part/2]). -export([close/1]). %% Headers. -export([form_data/1]). -export([parse_content_disposition/1]). -export([parse_content_transfer_encoding/1]). -export([parse_content_type/1]). -type headers() :: [{iodata(), iodata()}]. -export_type([headers/0]). -include("cow_inline.hrl"). -define(TEST1_MIME, << "This is a message with multiple parts in MIME format.\r\n" "--frontier\r\n" "Content-Type: text/plain\r\n" "\r\n" "This is the body of the message.\r\n" "--frontier\r\n" "Content-Type: application/octet-stream\r\n" "Content-Transfer-Encoding: base64\r\n" "\r\n" "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n" "--frontier--" >>). -define(TEST1_BOUNDARY, <<"frontier">>). -define(TEST2_MIME, << "--AaB03x\r\n" "Content-Disposition: form-data; name=\"submit-name\"\r\n" "\r\n" "Larry\r\n" "--AaB03x\r\n" "Content-Disposition: form-data; name=\"files\"\r\n" "Content-Type: multipart/mixed; boundary=BbC04y\r\n" "\r\n" "--BbC04y\r\n" "Content-Disposition: file; filename=\"file1.txt\"\r\n" "Content-Type: text/plain\r\n" "\r\n" "... contents of file1.txt ...\r\n" "--BbC04y\r\n" "Content-Disposition: file; filename=\"file2.gif\"\r\n" "Content-Type: image/gif\r\n" "Content-Transfer-Encoding: binary\r\n" "\r\n" "...contents of file2.gif...\r\n" "--BbC04y--\r\n" "--AaB03x--" >>). -define(TEST2_BOUNDARY, <<"AaB03x">>). -define(TEST3_MIME, << "This is the preamble.\r\n" "--boundary\r\n" "Content-Type: text/plain\r\n" "\r\n" "This is the body of the message.\r\n" "--boundary--" "\r\nThis is the epilogue. Here it includes leading CRLF" >>). -define(TEST3_BOUNDARY, <<"boundary">>). -define(TEST4_MIME, << "This is the preamble.\r\n" "--boundary\r\n" "Content-Type: text/plain\r\n" "\r\n" "This is the body of the message.\r\n" "--boundary--" "\r\n" >>). -define(TEST4_BOUNDARY, <<"boundary">>). %% RFC 2046, Section 5.1.1 -define(TEST5_MIME, << "This is the preamble. It is to be ignored, though it\r\n" "is a handy place for composition agents to include an\r\n" "explanatory note to non-MIME conformant readers.\r\n" "\r\n" "--simple boundary\r\n", "\r\n" "This is implicitly typed plain US-ASCII text.\r\n" "It does NOT end with a linebreak." "\r\n" "--simple boundary\r\n", "Content-type: text/plain; charset=us-ascii\r\n" "\r\n" "This is explicitly typed plain US-ASCII text.\r\n" "It DOES end with a linebreak.\r\n" "\r\n" "--simple boundary--\r\n" "\r\n" "This is the epilogue. It is also to be ignored." >>). -define(TEST5_BOUNDARY, <<"simple boundary">>). %% Parsing. %% %% The multipart format is defined in RFC 2045. %% @doc Parse the headers for the next multipart part. %% %% This function skips any preamble before the boundary. %% The preamble may be retrieved using parse_body/2. %% %% This function will accept input of any size, it is %% up to the caller to limit it if needed. -spec parse_headers(binary(), binary()) -> more | {more, binary()} | {ok, headers(), binary()} | {done, binary()}. %% If the stream starts with the boundary we can make a few assumptions %% and quickly figure out if we got the complete list of headers. parse_headers(<< "--", Stream/bits >>, Boundary) -> BoundarySize = byte_size(Boundary), case Stream of %% Last boundary. Return the epilogue. << Boundary:BoundarySize/binary, "--", Stream2/bits >> -> {done, Stream2}; << Boundary:BoundarySize/binary, Stream2/bits >> -> %% We have all the headers only if there is a \r\n\r\n %% somewhere in the data after the boundary. case binary:match(Stream2, <<"\r\n\r\n">>) of nomatch -> more; _ -> before_parse_headers(Stream2) end; %% If there isn't enough to represent Boundary \r\n\r\n %% then we definitely don't have all the headers. _ when byte_size(Stream) < byte_size(Boundary) + 4 -> more; %% Otherwise we have preamble data to skip. %% We still got rid of the first two misleading bytes. _ -> skip_preamble(Stream, Boundary) end; %% Otherwise we have preamble data to skip. parse_headers(Stream, Boundary) -> skip_preamble(Stream, Boundary). %% We need to find the boundary and a \r\n\r\n after that. %% Since the boundary isn't at the start, it must be right %% after a \r\n too. skip_preamble(Stream, Boundary) -> case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of %% No boundary, need more data. nomatch -> %% We can safely skip the size of the stream %% minus the last 3 bytes which may be a partial boundary. SkipSize = byte_size(Stream) - 3, case SkipSize > 0 of false -> more; true -> << _:SkipSize/binary, Stream2/bits >> = Stream, {more, Stream2} end; {Start, Length} -> Start2 = Start + Length, << _:Start2/binary, Stream2/bits >> = Stream, case Stream2 of %% Last boundary. Return the epilogue. << "--", Stream3/bits >> -> {done, Stream3}; _ -> case binary:match(Stream, <<"\r\n\r\n">>) of %% We don't have the full headers. nomatch -> {more, Stream2}; _ -> before_parse_headers(Stream2) end end end. before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> %% This indicates that there are no headers, so we can abort immediately. {ok, [], Stream}; before_parse_headers(<< "\r\n", Stream/bits >>) -> %% There is a line break right after the boundary, skip it. parse_hd_name(Stream, [], <<>>). parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> case C of $: -> parse_hd_before_value(Rest, H, SoFar); $\s -> parse_hd_name_ws(Rest, H, SoFar); $\t -> parse_hd_name_ws(Rest, H, SoFar); _ -> ?LOWER(parse_hd_name, Rest, H, SoFar) end. parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> case C of $\s -> parse_hd_name_ws(Rest, H, Name); $\t -> parse_hd_name_ws(Rest, H, Name); $: -> parse_hd_before_value(Rest, H, Name) end. parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> parse_hd_before_value(Rest, H, N); parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> parse_hd_before_value(Rest, H, N); parse_hd_before_value(Buffer, H, N) -> parse_hd_value(Buffer, H, N, <<>>). parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> case Rest of << "\n\r\n", Rest2/bits >> -> {ok, [{Name, SoFar}|Headers], Rest2}; << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> parse_hd_value(Rest2, Headers, Name, SoFar); << $\n, Rest2/bits >> -> parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) end; parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> parse_hd_value(Rest, H, N, << SoFar/binary, C >>). %% @doc Parse the body of the current multipart part. %% %% The body is everything until the next boundary. -spec parse_body(binary(), binary()) -> {ok, binary()} | {ok, binary(), binary()} | done | {done, binary()} | {done, binary(), binary()}. parse_body(Stream, Boundary) -> BoundarySize = byte_size(Boundary), case Stream of << "--", Boundary:BoundarySize/binary, _/bits >> -> done; _ -> case binary:match(Stream, << "\r\n--", Boundary/bits >>) of %% No boundary, check for a possible partial at the end. %% Return more or less of the body depending on the result. nomatch -> StreamSize = byte_size(Stream), From = StreamSize - BoundarySize - 3, MatchOpts = if %% Binary too small to contain boundary, check it fully. From < 0 -> []; %% Optimize, only check the end of the binary. true -> [{scope, {From, StreamSize - From}}] end, case binary:match(Stream, <<"\r">>, MatchOpts) of nomatch -> {ok, Stream}; {Pos, _} -> case Stream of << Body:Pos/binary >> -> {ok, Body}; << Body:Pos/binary, Rest/bits >> -> {ok, Body, Rest} end end; %% Boundary found, this is the last chunk of the body. {Pos, _} -> case Stream of << Body:Pos/binary, "\r\n" >> -> {done, Body}; << Body:Pos/binary, "\r\n", Rest/bits >> -> {done, Body, Rest}; << Body:Pos/binary, Rest/bits >> -> {done, Body, Rest} end end end. -ifdef(TEST). parse_test() -> H1 = [{<<"content-type">>, <<"text/plain">>}], Body1 = <<"This is the body of the message.">>, H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, {<<"content-transfer-encoding">>, <<"base64">>}]), Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, {ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY), {done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY), done = parse_body(Rest2, ?TEST1_BOUNDARY), {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY), H2 = lists:sort(H2Unsorted), {done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY), done = parse_body(Rest4, ?TEST1_BOUNDARY), {done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY), ok. parse_interleaved_test() -> H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], Body1 = <<"Larry">>, H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>}, {<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]), InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>}, {<<"content-type">>, <<"text/plain">>}]), InBody1 = <<"... contents of file1.txt ...">>, InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>}, {<<"content-type">>, <<"image/gif">>}, {<<"content-transfer-encoding">>, <<"binary">>}]), InBody2 = <<"...contents of file2.gif...">>, {ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY), {done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY), done = parse_body(Rest2, ?TEST2_BOUNDARY), {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY), H2 = lists:sort(H2Unsorted), {_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2), {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]} = parse_content_type(ContentType), {ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary), InH1 = lists:sort(InH1Unsorted), {done, InBody1, InRest2} = parse_body(InRest, InBoundary), done = parse_body(InRest2, InBoundary), {ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary), InH2 = lists:sort(InH2Unsorted), {done, InBody2, InRest4} = parse_body(InRest3, InBoundary), done = parse_body(InRest4, InBoundary), {done, Rest4} = parse_headers(InRest4, InBoundary), {done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY), ok. parse_epilogue_test() -> H1 = [{<<"content-type">>, <<"text/plain">>}], Body1 = <<"This is the body of the message.">>, Epilogue = <<"\r\nThis is the epilogue. Here it includes leading CRLF">>, {ok, H1, Rest} = parse_headers(?TEST3_MIME, ?TEST3_BOUNDARY), {done, Body1, Rest2} = parse_body(Rest, ?TEST3_BOUNDARY), done = parse_body(Rest2, ?TEST3_BOUNDARY), {done, Epilogue} = parse_headers(Rest2, ?TEST3_BOUNDARY), ok. parse_epilogue_crlf_test() -> H1 = [{<<"content-type">>, <<"text/plain">>}], Body1 = <<"This is the body of the message.">>, Epilogue = <<"\r\n">>, {ok, H1, Rest} = parse_headers(?TEST4_MIME, ?TEST4_BOUNDARY), {done, Body1, Rest2} = parse_body(Rest, ?TEST4_BOUNDARY), done = parse_body(Rest2, ?TEST4_BOUNDARY), {done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY), ok. parse_rfc2046_test() -> %% The following is an example included in RFC 2046, Section 5.1.1. Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n" "It does NOT end with a linebreak.">>, Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n" "It DOES end with a linebreak.\r\n">>, H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}], Epilogue = <<"\r\n\r\nThis is the epilogue. It is also to be ignored.">>, {ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY), {done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY), {ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY), {done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY), {done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY), ok. parse_partial_test() -> {ok, <<0:8000, "abcdef">>, <<"\rghij">>} = parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>), {ok, <<"abcdef">>, <<"\rghij">>} = parse_body(<<"abcdef\rghij">>, <<"boundary">>), {ok, <<"abc">>, <<"\rdef">>} = parse_body(<<"abc\rdef">>, <<"boundaryboundary">>), {ok, <<0:8000, "abcdef">>, <<"\r\nghij">>} = parse_body(<<0:8000, "abcdef\r\nghij">>, <<"boundary">>), {ok, <<"abcdef">>, <<"\r\nghij">>} = parse_body(<<"abcdef\r\nghij">>, <<"boundary">>), {ok, <<"abc">>, <<"\r\ndef">>} = parse_body(<<"abc\r\ndef">>, <<"boundaryboundary">>), {ok, <<"boundary">>, <<"\r">>} = parse_body(<<"boundary\r">>, <<"boundary">>), {ok, <<"boundary">>, <<"\r\n">>} = parse_body(<<"boundary\r\n">>, <<"boundary">>), {ok, <<"boundary">>, <<"\r\n-">>} = parse_body(<<"boundary\r\n-">>, <<"boundary">>), {ok, <<"boundary">>, <<"\r\n--">>} = parse_body(<<"boundary\r\n--">>, <<"boundary">>), ok. perf_parse_multipart(Stream, Boundary) -> case parse_headers(Stream, Boundary) of {ok, _, Rest} -> {_, _, Rest2} = parse_body(Rest, Boundary), perf_parse_multipart(Rest2, Boundary); {done, _} -> ok end. horse_parse() -> horse:repeat(50000, perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY) ). -endif. %% Building. %% @doc Generate a new random boundary. %% %% The boundary generated has a low probability of ever appearing %% in the data. -spec boundary() -> binary(). boundary() -> cow_base64url:encode(crypto:strong_rand_bytes(48), #{padding => false}). %% @doc Return the first part's head. %% %% This works exactly like the part/2 function except there is %% no leading \r\n. It's not required to use this function, %% just makes the output a little smaller and prettier. -spec first_part(binary(), headers()) -> iodata(). first_part(Boundary, Headers) -> [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. %% @doc Return a part's head. -spec part(binary(), headers()) -> iodata(). part(Boundary, Headers) -> [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. headers_to_iolist([], Acc) -> lists:reverse([<<"\r\n">>|Acc]); headers_to_iolist([{N, V}|Tail], Acc) -> %% We don't want to create a sublist so we list the %% values in reverse order so that it gets reversed properly. headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). %% @doc Return the closing delimiter of the multipart message. -spec close(binary()) -> iodata(). close(Boundary) -> [<<"\r\n--">>, Boundary, <<"--">>]. -ifdef(TEST). build_test() -> Result = string:to_lower(binary_to_list(?TEST1_MIME)), Result = string:to_lower(binary_to_list(iolist_to_binary([ <<"This is a message with multiple parts in MIME format.\r\n">>, first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]), <<"This is the body of the message.">>, part(?TEST1_BOUNDARY, [ {<<"content-type">>, <<"application/octet-stream">>}, {<<"content-transfer-encoding">>, <<"base64">>}]), <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, close(?TEST1_BOUNDARY) ]))), ok. identity_test() -> B = boundary(), Preamble = <<"This is a message with multiple parts in MIME format.">>, H1 = [{<<"content-type">>, <<"text/plain">>}], Body1 = <<"This is the body of the message.">>, H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, {<<"content-transfer-encoding">>, <<"base64">>}]), Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, Epilogue = <<"Gotta go fast!">>, M = iolist_to_binary([ Preamble, part(B, H1), Body1, part(B, H2), Body2, close(B), Epilogue ]), {done, Preamble, M2} = parse_body(M, B), {ok, H1, M3} = parse_headers(M2, B), {done, Body1, M4} = parse_body(M3, B), {ok, H2Unsorted, M5} = parse_headers(M4, B), H2 = lists:sort(H2Unsorted), {done, Body2, M6} = parse_body(M5, B), {done, Epilogue} = parse_headers(M6, B), ok. perf_build_multipart() -> B = boundary(), [ <<"preamble\r\n">>, first_part(B, [{<<"content-type">>, <<"text/plain">>}]), <<"This is the body of the message.">>, part(B, [ {<<"content-type">>, <<"application/octet-stream">>}, {<<"content-transfer-encoding">>, <<"base64">>}]), <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, close(B), <<"epilogue">> ]. horse_build() -> horse:repeat(50000, perf_build_multipart() ). -endif. %% Headers. %% @doc Convenience function for extracting information from headers %% when parsing a multipart/form-data stream. -spec form_data(headers() | #{binary() => binary()}) -> {data, binary()} | {file, binary(), binary(), binary()}. form_data(Headers) when is_map(Headers) -> form_data(maps:to_list(Headers)); form_data(Headers) -> {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), case lists:keyfind(<<"filename">>, 1, Params) of false -> {data, FieldName}; {_, Filename} -> Type = case lists:keyfind(<<"content-type">>, 1, Headers) of false -> <<"text/plain">>; {_, T} -> T end, {file, FieldName, Filename, Type} end. -ifdef(TEST). form_data_test_() -> Tests = [ {[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], {data, <<"submit-name">>}}, {[{<<"content-disposition">>, <<"form-data; name=\"files\"; filename=\"file1.txt\"">>}, {<<"content-type">>, <<"text/x-plain">>}], {file, <<"files">>, <<"file1.txt">>, <<"text/x-plain">>}} ], [{lists:flatten(io_lib:format("~p", [V])), fun() -> R = form_data(V) end} || {V, R} <- Tests]. -endif. %% @todo parse_content_description %% @todo parse_content_id %% @doc Parse an RFC 2183 content-disposition value. %% @todo Support RFC 2231. -spec parse_content_disposition(binary()) -> {binary(), [{binary(), binary()}]}. parse_content_disposition(Bin) -> parse_cd_type(Bin, <<>>). parse_cd_type(<<>>, Acc) -> {Acc, []}; parse_cd_type(<< C, Rest/bits >>, Acc) -> case C of $; -> {Acc, parse_before_param(Rest, [])}; $\s -> {Acc, parse_before_param(Rest, [])}; $\t -> {Acc, parse_before_param(Rest, [])}; _ -> ?LOWER(parse_cd_type, Rest, Acc) end. -ifdef(TEST). parse_content_disposition_test_() -> Tests = [ {<<"inline">>, {<<"inline">>, []}}, {<<"attachment">>, {<<"attachment">>, []}}, {<<"attachment; filename=genome.jpeg;" " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>, {<<"attachment">>, [ {<<"filename">>, <<"genome.jpeg">>}, {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>} ]}}, {<<"form-data; name=\"user\"">>, {<<"form-data">>, [{<<"name">>, <<"user">>}]}}, {<<"form-data; NAME=\"submit-name\"">>, {<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}}, {<<"form-data; name=\"files\"; filename=\"file1.txt\"">>, {<<"form-data">>, [ {<<"name">>, <<"files">>}, {<<"filename">>, <<"file1.txt">>} ]}}, {<<"file; filename=\"file1.txt\"">>, {<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}}, {<<"file; filename=\"file2.gif\"">>, {<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}} ], [{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests]. horse_parse_content_disposition_attachment() -> horse:repeat(100000, parse_content_disposition(<<"attachment; filename=genome.jpeg;" " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>) ). horse_parse_content_disposition_form_data() -> horse:repeat(100000, parse_content_disposition( <<"form-data; name=\"files\"; filename=\"file1.txt\"">>) ). horse_parse_content_disposition_inline() -> horse:repeat(100000, parse_content_disposition(<<"inline">>) ). -endif. %% @doc Parse an RFC 2045 content-transfer-encoding header. -spec parse_content_transfer_encoding(binary()) -> binary(). parse_content_transfer_encoding(Bin) -> ?LOWER(Bin). -ifdef(TEST). parse_content_transfer_encoding_test_() -> Tests = [ {<<"7bit">>, <<"7bit">>}, {<<"7BIT">>, <<"7bit">>}, {<<"8bit">>, <<"8bit">>}, {<<"binary">>, <<"binary">>}, {<<"quoted-printable">>, <<"quoted-printable">>}, {<<"base64">>, <<"base64">>}, {<<"Base64">>, <<"base64">>}, {<<"BASE64">>, <<"base64">>}, {<<"bAsE64">>, <<"base64">>} ], [{V, fun() -> R = parse_content_transfer_encoding(V) end} || {V, R} <- Tests]. horse_parse_content_transfer_encoding() -> horse:repeat(100000, parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>) ). -endif. %% @doc Parse an RFC 2045 content-type header. -spec parse_content_type(binary()) -> {binary(), binary(), [{binary(), binary()}]}. parse_content_type(Bin) -> parse_ct_type(Bin, <<>>). parse_ct_type(<< C, Rest/bits >>, Acc) -> case C of $/ -> parse_ct_subtype(Rest, Acc, <<>>); _ -> ?LOWER(parse_ct_type, Rest, Acc) end. parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> {Type, Subtype, []}; parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> case C of $; -> {Type, Acc, parse_before_param(Rest, [])}; $\s -> {Type, Acc, parse_before_param(Rest, [])}; $\t -> {Type, Acc, parse_before_param(Rest, [])}; _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc) end. -ifdef(TEST). parse_content_type_test_() -> Tests = [ {<<"image/gif">>, {<<"image">>, <<"gif">>, []}}, {<<"text/plain">>, {<<"text">>, <<"plain">>, []}}, {<<"text/plain; charset=us-ascii">>, {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, {<<"text/plain; charset=\"us-ascii\"">>, {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, {<<"multipart/form-data; boundary=AaB03x">>, {<<"multipart">>, <<"form-data">>, [{<<"boundary">>, <<"AaB03x">>}]}}, {<<"multipart/mixed; boundary=BbC04y">>, {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}}, {<<"multipart/mixed; boundary=--------">>, {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}}, {<<"application/x-horse; filename=genome.jpeg;" " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" " charset=us-ascii; empty=; number=12345">>, {<<"application">>, <<"x-horse">>, [ {<<"filename">>, <<"genome.jpeg">>}, {<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}, {<<"charset">>, <<"us-ascii">>}, {<<"empty">>, <<>>}, {<<"number">>, <<"12345">>} ]}} ], [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests]. horse_parse_content_type_zero() -> horse:repeat(100000, parse_content_type(<<"text/plain">>) ). horse_parse_content_type_one() -> horse:repeat(100000, parse_content_type(<<"text/plain; charset=\"us-ascii\"">>) ). horse_parse_content_type_five() -> horse:repeat(100000, parse_content_type(<<"application/x-horse; filename=genome.jpeg;" " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" " charset=us-ascii; empty=; number=12345">>) ). -endif. %% @doc Parse RFC 2045 parameters. parse_before_param(<<>>, Params) -> lists:reverse(Params); parse_before_param(<< C, Rest/bits >>, Params) -> case C of $; -> parse_before_param(Rest, Params); $\s -> parse_before_param(Rest, Params); $\t -> parse_before_param(Rest, Params); _ -> ?LOWER(parse_param_name, Rest, Params, <<>>) end. parse_param_name(<<>>, Params, Acc) -> lists:reverse([{Acc, <<>>}|Params]); parse_param_name(<< C, Rest/bits >>, Params, Acc) -> case C of $= -> parse_param_value(Rest, Params, Acc); _ -> ?LOWER(parse_param_name, Rest, Params, Acc) end. parse_param_value(<<>>, Params, Name) -> lists:reverse([{Name, <<>>}|Params]); parse_param_value(<< C, Rest/bits >>, Params, Name) -> case C of $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); C -> parse_param_value(Rest, Params, Name, << C >>) end. parse_param_value(<<>>, Params, Name, Acc) -> lists:reverse([{Name, Acc}|Params]); parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> case C of $; -> parse_before_param(Rest, [{Name, Acc}|Params]); $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) end. %% We expect a final $" so no need to test for <<>>. parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> parse_before_param(Rest, [{Name, Acc}|Params]); parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) when C =/= $\r -> parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).