diff options
Diffstat (limited to 'src/cowboy_multipart.erl')
-rw-r--r-- | src/cowboy_multipart.erl | 313 |
1 files changed, 0 insertions, 313 deletions
diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl deleted file mode 100644 index 4df5a27..0000000 --- a/src/cowboy_multipart.erl +++ /dev/null @@ -1,313 +0,0 @@ -%% Copyright (c) 2011, Anthony Ramine <[email protected]> -%% -%% Permission to use, copy, modify, and/or distribute this software for any -%% purpose with or without fee is hereby granted, provided that the above -%% copyright notice and this permission notice appear in all copies. -%% -%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -%% @doc Multipart parser. --module(cowboy_multipart). - --export([parser/1]). --export([content_disposition/1]). - --type part_parser() :: parser(more(part_result())). --type parser(T) :: fun((binary()) -> T). --type more(T) :: T | {more, parser(T)}. --type part_result() :: headers() | eof. --type headers() :: {headers, http_headers(), body_cont()}. --type http_headers() :: [{binary(), binary()}]. --type body_cont() :: cont(more(body_result())). --type cont(T) :: fun(() -> T). --type body_result() :: {body, binary(), body_cont()} | end_of_part(). --type end_of_part() :: {end_of_part, cont(more(part_result()))}. --type disposition() :: {binary(), [{binary(), binary()}]}. - -%% API. - -%% @doc Return a multipart parser for the given boundary. --spec parser(binary()) -> part_parser(). -parser(Boundary) when is_binary(Boundary) -> - fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. - -%% @doc Parse a content disposition. -%% @todo Parse the MIME header instead of the HTTP one. --spec content_disposition(binary()) -> disposition(). -content_disposition(Data) -> - cowboy_http:token_ci(Data, - fun (_Rest, <<>>) -> {error, badarg}; - (Rest, Disposition) -> - cowboy_http:params(Rest, - fun (<<>>, Params) -> {Disposition, Params}; - (_Rest2, _) -> {error, badarg} - end) - end). - -%% Internal. - -%% @doc Entry point of the multipart parser, skips over the preamble if any. --spec parse(binary(), binary()) -> more(part_result()). -parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> - BoundarySize = byte_size(Boundary), - Pattern = pattern(Boundary), - case Bin of - <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> - % Data starts with initial boundary, skip preamble parsing. - parse_boundary_tail(Rest, Pattern); - _ -> - % Parse preamble. - skip(Bin, Pattern) - end; -parse(Bin, Boundary) -> - % Not enough data to know if the data begins with a boundary. - more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). - --type pattern() :: {binary:cp(), non_neg_integer()}. --type patterns() :: {pattern(), pattern()}. - -%% @doc Return two compiled binary patterns with their sizes in bytes. -%% The boundary pattern is the boundary prepended with "\r\n--". -%% The boundary suffix pattern matches all prefixes of the boundary. --spec pattern(binary()) -> patterns(). -pattern(Boundary) -> - MatchPattern = <<"\r\n--", Boundary/binary>>, - MatchPrefixes = prefixes(MatchPattern), - {{binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}, - {binary:compile_pattern(MatchPrefixes), byte_size(MatchPattern)}}. - -%% @doc Return all prefixes of a binary string. -%% The list of prefixes includes the full string. --spec prefixes(binary()) -> [binary()]. -prefixes(<<C, Rest/binary>>) -> - prefixes(Rest, <<C>>). - --spec prefixes(binary(), binary()) -> [binary()]. -prefixes(<<C, Rest/binary>>, Acc) -> - [Acc|prefixes(Rest, <<Acc/binary, C>>)]; -prefixes(<<>>, Acc) -> - [Acc]. - -%% @doc Test if a boundary is a possble suffix. -%% The patterns are expected to have been returned from `pattern/1'. --spec suffix_match(binary(), patterns()) -> nomatch | {integer(), integer()}. -suffix_match(Bin, {_Boundary, {Pat, Len}}) -> - Size = byte_size(Bin), - suffix_match(Bin, Pat, Size, max(-Size, -Len)). - --spec suffix_match(binary(), binary:cp(), non_neg_integer(), 0|neg_integer()) -> - nomatch | {integer(), integer()}. -suffix_match(_Bin, _Pat, _Size, _Match=0) -> - nomatch; -suffix_match(Bin, Pat, Size, Match) when Match < 0 -> - case binary:match(Bin, Pat, [{scope, {Size, Match}}]) of - {Pos, Len}=Part when Pos + Len =:= Size -> Part; - {_, Len} -> suffix_match(Bin, Pat, Size, Match + Len); - nomatch -> nomatch - end. - -%% @doc Parse remaining characters of a line beginning with the boundary. -%% If followed by "--", <em>eof</em> is returned and parsing is finished. --spec parse_boundary_tail(binary(), patterns()) -> more(part_result()). -parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> - case Bin of - <<"--", _Rest/binary>> -> - % Boundary is followed by "--", end parsing. - eof; - _ -> - % No dash after boundary, proceed with unknown chars and lwsp - % removal. - parse_boundary_eol(Bin, Pattern) - end; -parse_boundary_tail(Bin, Pattern) -> - % Boundary may be followed by "--", need more data. - more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). - -%% @doc Skip whitespace and unknown chars until CRLF. --spec parse_boundary_eol(binary(), patterns()) -> more(part_result()). -parse_boundary_eol(Bin, Pattern) -> - case binary:match(Bin, <<"\r\n">>) of - {CrlfStart, _Length} -> - % End of line found, remove optional whitespace. - <<_:CrlfStart/binary, Rest/binary>> = Bin, - Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, - cowboy_http:whitespace(Rest, Fun); - nomatch -> - % CRLF not found in the given binary. - RestStart = max(byte_size(Bin) - 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) - end. - --spec parse_boundary_crlf(binary(), patterns()) -> more(part_result()). -parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> - % The binary is at least 2 bytes long as this function is only called by - % parse_boundary_eol/3 when CRLF has been found so a more tuple will never - % be returned from here. - parse_headers(Rest, Pattern); -parse_boundary_crlf(Bin, Pattern) -> - % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is - % not followed directly by a new line. In this implementation it is - % considered part of the boundary so EOL needs to be searched again. - parse_boundary_eol(Bin, Pattern). - --spec parse_headers(binary(), patterns()) -> more(part_result()). -parse_headers(Bin, Pattern) -> - parse_headers(Bin, Pattern, []). - --spec parse_headers(binary(), patterns(), http_headers()) -> more(part_result()). -parse_headers(Bin, Pattern, Acc) -> - case erlang:decode_packet(httph_bin, Bin, []) of - {ok, {http_header, _, Name, _, Value}, Rest} -> - Name2 = case is_atom(Name) of - true -> cowboy_bstr:to_lower(atom_to_binary(Name, latin1)); - false -> cowboy_bstr:to_lower(Name) - end, - parse_headers(Rest, Pattern, [{Name2, Value} | Acc]); - {ok, http_eoh, Rest} -> - Headers = lists:reverse(Acc), - {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; - {ok, {http_error, _}, _} -> - % Skip malformed parts. - skip(Bin, Pattern); - {more, _} -> - more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) - end. - --spec parse_body(binary(), patterns()) -> more(body_result()). -parse_body(Bin, Pattern = {{P, PSize}, _}) when byte_size(Bin) >= PSize -> - case binary:match(Bin, P) of - {0, _Length} -> - <<_:PSize/binary, Rest/binary>> = Bin, - end_of_part(Rest, Pattern); - {BoundaryStart, _Length} -> - % Boundary found, this is the latest partial body that will be - % returned for this part. - <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin, - FResult = end_of_part(Rest, Pattern), - {body, PBody, fun () -> FResult end}; - nomatch -> - case suffix_match(Bin, Pattern) of - nomatch -> - %% Prefix of boundary not found at end of input. it's - %% safe to return the whole binary. Saves copying of - %% next input onto tail of current input binary. - {body, Bin, fun () -> parse_body(<<>>, Pattern) end}; - {BoundaryStart, Len} -> - PBody = binary:part(Bin, 0, BoundaryStart), - Rest = binary:part(Bin, BoundaryStart, Len), - {body, PBody, fun () -> parse_body(Rest, Pattern) end} - end - end; -parse_body(Bin, Pattern) -> - more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). - --spec end_of_part(binary(), patterns()) -> end_of_part(). -end_of_part(Bin, Pattern) -> - {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. - --spec skip(binary(), patterns()) -> more(part_result()). -skip(Bin, Pattern = {{P, PSize}, _}) -> - case binary:match(Bin, P) of - {BoundaryStart, _Length} -> - % Boundary found, proceed with parsing of the next part. - RestStart = BoundaryStart + PSize, - <<_:RestStart/binary, Rest/binary>> = Bin, - parse_boundary_tail(Rest, Pattern); - nomatch -> - % Boundary not found, need more data. - RestStart = max(byte_size(Bin) - PSize + 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) - end. - --spec more(binary(), parser(T)) -> {more, parser(T)}. -more(<<>>, F) -> - {more, F}; -more(Bin, InnerF) -> - F = fun (NewData) when is_binary(NewData) -> - InnerF(<<Bin/binary, NewData/binary>>) - end, - {more, F}. - -%% Tests. - --ifdef(TEST). - -multipart_test_() -> - %% {Body, Result} - Tests = [ - {<<"--boundary--">>, []}, - {<<"preamble\r\n--boundary--">>, []}, - {<<"--boundary--\r\nepilogue">>, []}, - {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, - [{[{<<"a">>, <<"b">>}, {<<"c">>, <<"d">>}], <<>>}]}, - { - << - "--boundary\r\nX-Name:answer\r\n\r\n42" - "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" - "\r\n--boundary--" - >>, - [ - {[{<<"x-name">>, <<"answer">>}], <<"42">>}, - {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>} - ] - } - ], - [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. - -acc_multipart(V) -> - acc_multipart((parser(<<"boundary">>))(V), []). - -acc_multipart({headers, Headers, Cont}, Acc) -> - acc_multipart(Cont(), [{Headers, []}|Acc]); -acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> - acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); -acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> - Body = list_to_binary(lists:reverse(BodyAcc)), - acc_multipart(Cont(), [{Headers, Body}|Acc]); -acc_multipart(eof, Acc) -> - lists:reverse(Acc). - -content_disposition_test_() -> - %% {Disposition, Result} - Tests = [ - {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, - {<<"inline">>, {<<"inline">>, []}}, - {<<"attachment; \tfilename=brackets-slides.pdf">>, - {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} - ], - [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. - -title(Bin) -> - Title = lists:foldl( - fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, - Bin, - [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] - ), - iolist_to_binary(Title). - -suffix_test_() -> - Tests = [ - {nomatch, <<>>, <<"ABC">>}, - {{0, 1}, <<"\r">>, <<"ABC">>}, - {{0, 2}, <<"\r\n">>, <<"ABC">>}, - {{0, 4}, <<"\r\n--">>, <<"ABC">>}, - {{0, 5}, <<"\r\n--A">>, <<"ABC">>}, - {{0, 6}, <<"\r\n--AB">>, <<"ABC">>}, - {{0, 7}, <<"\r\n--ABC">>, <<"ABC">>}, - {nomatch, <<"\r\n--AB1">>, <<"ABC">>}, - {{1, 1}, <<"1\r">>, <<"ABC">>}, - {{2, 2}, <<"12\r\n">>, <<"ABC">>}, - {{3, 4}, <<"123\r\n--">>, <<"ABC">>} - ], - [fun() -> Part = suffix_match(Packet, pattern(Boundary)) end || - {Part, Packet, Boundary} <- Tests]. - --endif. |