diff options
author | Loïc Hoguin <[email protected]> | 2014-02-05 13:13:37 +0100 |
---|---|---|
committer | Loïc Hoguin <[email protected]> | 2014-02-05 13:19:35 +0100 |
commit | 521d7ed582d9b948031896c597170ef101d9f8b0 (patch) | |
tree | 0b15327d7cffee8399b0f2888ef77fcbda310a56 /src | |
parent | 799e6e9b8faebe825b70299521b2e3465fc47c89 (diff) | |
download | cowlib-521d7ed582d9b948031896c597170ef101d9f8b0.tar.gz cowlib-521d7ed582d9b948031896c597170ef101d9f8b0.tar.bz2 cowlib-521d7ed582d9b948031896c597170ef101d9f8b0.zip |
Add the cow_multipart module for multipart message parsing and building
This is a new implementation that has been measured to be up
to 33% faster than the implementation that was in Cowboy.
Diffstat (limited to 'src')
-rw-r--r-- | src/cow_multipart.erl | 683 | ||||
-rw-r--r-- | src/cowlib.app.src | 3 |
2 files changed, 685 insertions, 1 deletions
diff --git a/src/cow_multipart.erl b/src/cow_multipart.erl new file mode 100644 index 0000000..e1acf63 --- /dev/null +++ b/src/cow_multipart.erl @@ -0,0 +1,683 @@ +%% Copyright (c) 2014, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_multipart). + +%% Parsing. +-export([parse_headers/2]). +-export([parse_body/2]). + +%% Building. +-export([boundary/0]). +-export([first_part/2]). +-export([part/2]). +-export([close/1]). + +%% Headers. +-export([form_data/1]). +-export([parse_content_disposition/1]). +-export([parse_content_transfer_encoding/1]). +-export([parse_content_type/1]). + +-type headers() :: [{iodata(), iodata()}]. +-export_type([headers/0]). + +-include("cow_inline.hrl"). + +-define(TEST1_MIME, << + "This is a message with multiple parts in MIME format.\r\n" + "--frontier\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "This is the body of the message.\r\n" + "--frontier\r\n" + "Content-Type: application/octet-stream\r\n" + "Content-Transfer-Encoding: base64\r\n" + "\r\n" + "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n" + "--frontier--\r\n" +>>). +-define(TEST1_BOUNDARY, <<"frontier">>). + +-define(TEST2_MIME, << + "--AaB03x\r\n" + "Content-Disposition: form-data; name=\"submit-name\"\r\n" + "\r\n" + "Larry\r\n" + "--AaB03x\r\n" + "Content-Disposition: form-data; name=\"files\"\r\n" + "Content-Type: multipart/mixed; boundary=BbC04y\r\n" + "\r\n" + "--BbC04y\r\n" + "Content-Disposition: file; filename=\"file1.txt\"\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "... contents of file1.txt ...\r\n" + "--BbC04y\r\n" + "Content-Disposition: file; filename=\"file2.gif\"\r\n" + "Content-Type: image/gif\r\n" + "Content-Transfer-Encoding: binary\r\n" + "\r\n" + "...contents of file2.gif...\r\n" + "--BbC04y--\r\n" + "--AaB03x--\r\n" +>>). +-define(TEST2_BOUNDARY, <<"AaB03x">>). + +%% Parsing. +%% +%% The multipart format is defined in RFC 2045. + +%% @doc Parse the headers for the next multipart part. +%% +%% This function skips any preamble before the boundary. +%% The preamble may be retrieved using parse_body/2. +%% +%% This function will accept input of any size, it is +%% up to the caller to limit it if needed. + +-spec parse_headers(binary(), binary()) + -> more | {more, binary()} + | {ok, headers(), binary()} + | {done, binary()}. +%% If the stream starts with the boundary we can make a few assumptions +%% and quickly figure out if we got the complete list of headers. +parse_headers(<< "--", Stream/bits >>, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + %% Last boundary. Return the epilogue. + << Boundary:BoundarySize/binary, "--\r\n", Stream2/bits >> -> + {done, Stream2}; + << Boundary:BoundarySize/binary, Stream2/bits >> -> + %% We have all the headers only if there is a \r\n\r\n + %% somewhere in the data after the boundary. + case binary:match(Stream2, <<"\r\n\r\n">>) of + nomatch -> + more; + _ -> + before_parse_headers(Stream2) + end; + %% If there isn't enough to represent Boundary \r\n\r\n + %% then we definitely don't have all the headers. + _ when byte_size(Stream) < byte_size(Boundary) + 4 -> + more; + %% Otherwise we have preamble data to skip. + %% We still got rid of the first two misleading bytes. + _ -> + skip_preamble(Stream, Boundary) + end; +%% Otherwise we have preamble data to skip. +parse_headers(Stream, Boundary) -> + skip_preamble(Stream, Boundary). + +%% We need to find the boundary and a \r\n\r\n after that. +%% Since the boundary isn't at the start, it must be right +%% after a \r\n too. +skip_preamble(Stream, Boundary) -> + case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of + %% No boundary, need more data. + nomatch -> + %% We can safely skip the size of the stream + %% minus the last 3 bytes which may be a partial boundary. + SkipSize = byte_size(Stream) - 3, + case SkipSize > 0 of + false -> + more; + true -> + << _:SkipSize/binary, Stream2/bits >> = Stream, + {more, Stream2} + end; + {Start, Length} -> + Start2 = Start + Length, + << _:Start2/binary, Stream2/bits >> = Stream, + case Stream2 of + %% Last boundary. Return the epilogue. + << "--\r\n", Stream3/bits >> -> + {done, Stream3}; + _ -> + case binary:match(Stream, <<"\r\n\r\n">>) of + %% We don't have the full headers. + nomatch -> + {more, Stream2}; + _ -> + before_parse_headers(Stream2) + end + end + end. + +%% There is a line break right after the boundary, skip it. +%% +%% We only skip it now because there might be no headers at all, +%% which means the \r\n\r\n indicating the end of headers also +%% includes this line break. +before_parse_headers(<< "\r\n", Stream/bits >>) -> + parse_hd_name(Stream, [], <<>>). + +parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> + case C of + $: -> parse_hd_before_value(Rest, H, SoFar); + $\s -> parse_hd_name_ws(Rest, H, SoFar); + $\t -> parse_hd_name_ws(Rest, H, SoFar); + ?INLINE_LOWERCASE(parse_hd_name, Rest, H, SoFar) + end. + +parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> + case C of + $\s -> parse_hd_name_ws(Rest, H, Name); + $\t -> parse_hd_name_ws(Rest, H, Name); + $: -> parse_hd_before_value(Rest, H, Name) + end. + +parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(Buffer, H, N) -> + parse_hd_value(Buffer, H, N, <<>>). + +parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> + case Rest of + << "\n\r\n", Rest2/bits >> -> + {ok, [{Name, SoFar}|Headers], Rest2}; + << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> + parse_hd_value(Rest2, Headers, Name, SoFar); + << $\n, Rest2/bits >> -> + parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) + end; +parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> + parse_hd_value(Rest, H, N, << SoFar/binary, C >>). + +%% @doc Parse the body of the current multipart part. +%% +%% The body is everything until the next boundary. + +-spec parse_body(binary(), binary()) + -> {ok, binary()} | {ok, binary(), binary()} + | done | {done, binary()} | {done, binary(), binary()}. +parse_body(Stream, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + << "--", Boundary:BoundarySize/binary, _/bits >> -> + done; + _ -> + case binary:match(Stream, << "\r\n--", Boundary/bits >>) of + %% No boundary, check for a possible partial at the end. + %% Return more or less of the body depending on the result. + nomatch -> + From = byte_size(Stream) - BoundarySize - 3, + case binary:match(Stream, <<"\r">>) of + nomatch -> + {ok, Stream}; + {Pos, _} -> + Pos2 = From + Pos, + case Stream of + << Body:Pos2/binary, "\r\n" >> -> + {ok, Body}; + << Body:Pos2/binary, "\r\n", Rest/bits >> -> + {ok, Body, Rest}; + << Body:Pos2/binary, Rest/bits >> -> + {ok, Body, Rest} + end + end; + %% Boundary found, this is the last chunk of the body. + {Pos, _} -> + case Stream of + << Body:Pos/binary, "\r\n" >> -> + {done, Body}; + << Body:Pos/binary, "\r\n", Rest/bits >> -> + {done, Body, Rest}; + << Body:Pos/binary, Rest/bits >> -> + {done, Body, Rest} + end + end + end. + +-ifdef(TEST). +parse_test() -> + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + {ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY), + done = parse_body(Rest2, ?TEST1_BOUNDARY), + {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY), + H2 = lists:sort(H2Unsorted), + {done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY), + done = parse_body(Rest4, ?TEST1_BOUNDARY), + {done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY), + ok. + +parse_interleaved_test() -> + H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], + Body1 = <<"Larry">>, + H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>}, + {<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]), + InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>}, + {<<"content-type">>, <<"text/plain">>}]), + InBody1 = <<"... contents of file1.txt ...">>, + InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>}, + {<<"content-type">>, <<"image/gif">>}, + {<<"content-transfer-encoding">>, <<"binary">>}]), + InBody2 = <<"...contents of file2.gif...">>, + {ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY), + done = parse_body(Rest2, ?TEST2_BOUNDARY), + {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY), + H2 = lists:sort(H2Unsorted), + {_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2), + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]} + = parse_content_type(ContentType), + {ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary), + InH1 = lists:sort(InH1Unsorted), + {done, InBody1, InRest2} = parse_body(InRest, InBoundary), + done = parse_body(InRest2, InBoundary), + {ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary), + InH2 = lists:sort(InH2Unsorted), + {done, InBody2, InRest4} = parse_body(InRest3, InBoundary), + done = parse_body(InRest4, InBoundary), + {done, Rest4} = parse_headers(InRest4, InBoundary), + {done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY), + ok. +-endif. + +-ifdef(PERF). +perf_parse_multipart(Stream, Boundary) -> + case parse_headers(Stream, Boundary) of + {ok, _, Rest} -> + {_, _, Rest2} = parse_body(Rest, Boundary), + perf_parse_multipart(Rest2, Boundary); + {done, _} -> + ok + end. + +horse_parse() -> + horse:repeat(50000, + perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY) + ). +-endif. + +%% Building. + +%% @doc Generate a new random boundary. +%% +%% The boundary generated has a low probability of ever appearing +%% in the data. + +-spec boundary() -> binary(). +boundary() -> + base64:encode(crypto:rand_bytes(48)). + +%% @doc Return the first part's head. +%% +%% This works exactly like the part/2 function except there is +%% no leading \r\n. It's not required to use this function, +%% just makes the output a little smaller and prettier. + +-spec first_part(binary(), headers()) -> iodata(). +first_part(Boundary, Headers) -> + [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +%% @doc Return a part's head. + +-spec part(binary(), headers()) -> iodata(). +part(Boundary, Headers) -> + [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +headers_to_iolist([], Acc) -> + lists:reverse([<<"\r\n">>|Acc]); +headers_to_iolist([{N, V}|Tail], Acc) -> + %% We don't want to create a sublist so we list the + %% values in reverse order so that it gets reversed properly. + headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). + +%% @doc Return the closing delimiter of the multipart message. + +-spec close(binary()) -> iodata(). +close(Boundary) -> + [<<"\r\n--">>, Boundary, <<"--\r\n">>]. + +-ifdef(TEST). +build_test() -> + Result = string:to_lower(binary_to_list(?TEST1_MIME)), + Result = string:to_lower(binary_to_list(iolist_to_binary([ + <<"This is a message with multiple parts in MIME format.\r\n">>, + first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]), + <<"This is the body of the message.">>, + part(?TEST1_BOUNDARY, [ + {<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + close(?TEST1_BOUNDARY) + ]))), + ok. + +identity_test() -> + B = boundary(), + Preamble = <<"This is a message with multiple parts in MIME format.">>, + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + Epilogue = <<"Gotta go fast!">>, + M = iolist_to_binary([ + Preamble, + part(B, H1), Body1, + part(B, H2), Body2, + close(B), + Epilogue + ]), + {done, Preamble, M2} = parse_body(M, B), + {ok, H1, M3} = parse_headers(M2, B), + {done, Body1, M4} = parse_body(M3, B), + {ok, H2Unsorted, M5} = parse_headers(M4, B), + H2 = lists:sort(H2Unsorted), + {done, Body2, M6} = parse_body(M5, B), + {done, Epilogue} = parse_headers(M6, B), + ok. +-endif. + +-ifdef(PERF). +perf_build_multipart() -> + B = boundary(), + [ + <<"preamble\r\n">>, + first_part(B, [{<<"content-type">>, <<"text/plain">>}]), + <<"This is the body of the message.">>, + part(B, [ + {<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + close(B), + <<"epilogue">> + ]. + +horse_build() -> + horse:repeat(50000, + perf_build_multipart() + ). +-endif. + +%% Headers. + +%% @doc Convenience function for extracting information from headers +%% when parsing a multipart/form-data stream. + +-spec form_data(headers()) + -> {data, binary()} + | {file, binary(), binary(), binary(), binary()}. +form_data(Headers) -> + {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), + {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), + {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), + case lists:keyfind(<<"filename">>, 1, Params) of + false -> + {data, FieldName}; + {_, Filename} -> + Type = case lists:keyfind(<<"content-type">>, 1, Headers) of + false -> <<"text/plain">>; + {_, T} -> T + end, + TransferEncoding = case lists:keyfind( + <<"content-transfer-encoding">>, 1, Headers) of + false -> <<"7bit">>; + {_, TE} -> TE + end, + {file, FieldName, Filename, Type, TransferEncoding} + end. + +-ifdef(TEST). +form_data_test_() -> + Tests = [ + {[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], + {data, <<"submit-name">>}}, + {[{<<"content-disposition">>, + <<"form-data; name=\"files\"; filename=\"file1.txt\"">>}, + {<<"content-type">>, <<"text/x-plain">>}], + {file, <<"files">>, <<"file1.txt">>, + <<"text/x-plain">>, <<"7bit">>}} + ], + [{lists:flatten(io_lib:format("~p", [V])), + fun() -> R = form_data(V) end} || {V, R} <- Tests]. +-endif. + +%% @todo parse_content_description +%% @todo parse_content_id + +%% @doc Parse an RFC 2183 content-disposition value. +%% @todo Support RFC 2231. + +-spec parse_content_disposition(binary()) + -> {binary(), [{binary(), binary()}]}. +parse_content_disposition(Bin) -> + parse_cd_type(Bin, <<>>). + +parse_cd_type(<<>>, Acc) -> + {Acc, []}; +parse_cd_type(<< C, Rest/bits >>, Acc) -> + case C of + $; -> {Acc, parse_before_param(Rest, [])}; + $\s -> {Acc, parse_before_param(Rest, [])}; + $\t -> {Acc, parse_before_param(Rest, [])}; + ?INLINE_LOWERCASE(parse_cd_type, Rest, Acc) + end. + +-ifdef(TEST). +parse_content_disposition_test_() -> + Tests = [ + {<<"inline">>, {<<"inline">>, []}}, + {<<"attachment">>, {<<"attachment">>, []}}, + {<<"attachment; filename=genome.jpeg;" + " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>, + {<<"attachment">>, [ + {<<"filename">>, <<"genome.jpeg">>}, + {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>} + ]}}, + {<<"form-data; name=\"user\"">>, + {<<"form-data">>, [{<<"name">>, <<"user">>}]}}, + {<<"form-data; NAME=\"submit-name\"">>, + {<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}}, + {<<"form-data; name=\"files\"; filename=\"file1.txt\"">>, + {<<"form-data">>, [ + {<<"name">>, <<"files">>}, + {<<"filename">>, <<"file1.txt">>} + ]}}, + {<<"file; filename=\"file1.txt\"">>, + {<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}}, + {<<"file; filename=\"file2.gif\"">>, + {<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}} + ], + [{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests]. +-endif. + +-ifdef(PERF). +horse_parse_content_disposition_attachment() -> + horse:repeat(100000, + parse_content_disposition(<<"attachment; filename=genome.jpeg;" + " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>) + ). + +horse_parse_content_disposition_form_data() -> + horse:repeat(100000, + parse_content_disposition( + <<"form-data; name=\"files\"; filename=\"file1.txt\"">>) + ). + +horse_parse_content_disposition_inline() -> + horse:repeat(100000, + parse_content_disposition(<<"inline">>) + ). +-endif. + +%% @doc Parse an RFC 2045 content-transfer-encoding header. + +-spec parse_content_transfer_encoding(binary()) -> binary(). +parse_content_transfer_encoding(Bin) -> + ?INLINE_LOWERCASE_BC(Bin). + +-ifdef(TEST). +parse_content_transfer_encoding_test_() -> + Tests = [ + {<<"7bit">>, <<"7bit">>}, + {<<"7BIT">>, <<"7bit">>}, + {<<"8bit">>, <<"8bit">>}, + {<<"binary">>, <<"binary">>}, + {<<"quoted-printable">>, <<"quoted-printable">>}, + {<<"base64">>, <<"base64">>}, + {<<"Base64">>, <<"base64">>}, + {<<"BASE64">>, <<"base64">>}, + {<<"bAsE64">>, <<"base64">>} + ], + [{V, fun() -> R = parse_content_transfer_encoding(V) end} + || {V, R} <- Tests]. +-endif. + +-ifdef(PERF). +horse_parse_content_transfer_encoding() -> + horse:repeat(100000, + parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>) + ). +-endif. + +%% @doc Parse an RFC 2045 content-type header. + +-spec parse_content_type(binary()) + -> {binary(), binary(), [{binary(), binary()}]}. +parse_content_type(Bin) -> + parse_ct_type(Bin, <<>>). + +parse_ct_type(<< C, Rest/bits >>, Acc) -> + case C of + $/ -> parse_ct_subtype(Rest, Acc, <<>>); + ?INLINE_LOWERCASE(parse_ct_type, Rest, Acc) + end. + +parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> + {Type, Subtype, []}; +parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> + case C of + $; -> {Type, Acc, parse_before_param(Rest, [])}; + $\s -> {Type, Acc, parse_before_param(Rest, [])}; + $\t -> {Type, Acc, parse_before_param(Rest, [])}; + ?INLINE_LOWERCASE(parse_ct_subtype, Rest, Type, Acc) + end. + +-ifdef(TEST). +parse_content_type_test_() -> + Tests = [ + {<<"image/gif">>, + {<<"image">>, <<"gif">>, []}}, + {<<"text/plain">>, + {<<"text">>, <<"plain">>, []}}, + {<<"text/plain; charset=us-ascii">>, + {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, + {<<"text/plain; charset=\"us-ascii\"">>, + {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, + {<<"multipart/form-data; boundary=AaB03x">>, + {<<"multipart">>, <<"form-data">>, + [{<<"boundary">>, <<"AaB03x">>}]}}, + {<<"multipart/mixed; boundary=BbC04y">>, + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}}, + {<<"multipart/mixed; boundary=--------">>, + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}}, + {<<"application/x-horse; filename=genome.jpeg;" + " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" + " charset=us-ascii; empty=; number=12345">>, + {<<"application">>, <<"x-horse">>, [ + {<<"filename">>, <<"genome.jpeg">>}, + {<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}, + {<<"charset">>, <<"us-ascii">>}, + {<<"empty">>, <<>>}, + {<<"number">>, <<"12345">>} + ]}} + ], + [{V, fun() -> R = parse_content_type(V) end} + || {V, R} <- Tests]. +-endif. + +-ifdef(PERF). +horse_parse_content_type_zero() -> + horse:repeat(100000, + parse_content_type(<<"text/plain">>) + ). + +horse_parse_content_type_one() -> + horse:repeat(100000, + parse_content_type(<<"text/plain; charset=\"us-ascii\"">>) + ). + +horse_parse_content_type_five() -> + horse:repeat(100000, + parse_content_type(<<"application/x-horse; filename=genome.jpeg;" + " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" + " charset=us-ascii; empty=; number=12345">>) + ). +-endif. + +%% @doc Parse RFC 2045 parameters. + +parse_before_param(<<>>, Params) -> + lists:reverse(Params); +parse_before_param(<< C, Rest/bits >>, Params) -> + case C of + $; -> parse_before_param(Rest, Params); + $\s -> parse_before_param(Rest, Params); + $\t -> parse_before_param(Rest, Params); + ?INLINE_LOWERCASE(parse_param_name, Rest, Params, <<>>) + end. + +parse_param_name(<<>>, Params, Acc) -> + lists:reverse([{Acc, <<>>}|Params]); +parse_param_name(<< C, Rest/bits >>, Params, Acc) -> + case C of + $= -> parse_param_value(Rest, Params, Acc); + ?INLINE_LOWERCASE(parse_param_name, Rest, Params, Acc) + end. + +parse_param_value(<<>>, Params, Name) -> + lists:reverse([{Name, <<>>}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name) -> + case C of + $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); + $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); + C -> parse_param_value(Rest, Params, Name, << C >>) + end. + +parse_param_value(<<>>, Params, Name, Acc) -> + lists:reverse([{Name, Acc}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> + case C of + $; -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); + C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) + end. + +%% We expect a final $" so no need to test for <<>>. +parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); +parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> + parse_before_param(Rest, [{Name, Acc}|Params]); +parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) + when C =/= $\r -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>). diff --git a/src/cowlib.app.src b/src/cowlib.app.src index 568f72b..4981402 100644 --- a/src/cowlib.app.src +++ b/src/cowlib.app.src @@ -19,6 +19,7 @@ {registered, []}, {applications, [ kernel, - stdlib + stdlib, + crypto ]} ]}. |