diff options
Diffstat (limited to 'src/cowboy_multipart.erl')
-rw-r--r-- | src/cowboy_multipart.erl | 249 |
1 files changed, 249 insertions, 0 deletions
diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl new file mode 100644 index 0000000..b7aeb54 --- /dev/null +++ b/src/cowboy_multipart.erl @@ -0,0 +1,249 @@ +%% Copyright (c) 2011, Anthony Ramine <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% @doc Multipart parser. +-module(cowboy_multipart). + +-type part_parser() :: parser(more(part_result())). +-type parser(T) :: fun((binary()) -> T). +-type more(T) :: T | {more, parser(T)}. +-type part_result() :: headers() | eof. +-type headers() :: {headers, http_headers(), body_cont()}. +-type http_headers() :: [{atom() | binary(), binary()}]. +-type body_cont() :: cont(more(body_result())). +-type cont(T) :: fun(() -> T). +-type body_result() :: {body, binary(), body_cont()} | end_of_part(). +-type end_of_part() :: {end_of_part, cont(more(part_result()))}. +-type disposition() :: {binary(), [{binary(), binary()}]}. + +-export([parser/1, content_disposition/1]). + +-include_lib("eunit/include/eunit.hrl"). + +%% API. + +%% @doc Return a multipart parser for the given boundary. +-spec parser(binary()) -> part_parser(). +parser(Boundary) when is_binary(Boundary) -> + fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. + +%% @doc Parse a content disposition. +%% @todo Parse the MIME header instead of the HTTP one. +-spec content_disposition(binary()) -> disposition(). +content_disposition(Data) -> + cowboy_http:token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Disposition) -> + cowboy_http:content_type_params(Rest, + fun (Params) -> {Disposition, Params} end, []) + end). + +%% Internal. + +%% @doc Entry point of the multipart parser, skips over the preamble if any. +-spec parse(binary(), binary()) -> more(part_result()). +parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> + BoundarySize = byte_size(Boundary), + Pattern = pattern(Boundary), + case Bin of + <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> + % Data starts with initial boundary, skip preamble parsing. + parse_boundary_tail(Rest, Pattern); + _ -> + % Parse preamble. + skip(Bin, Pattern) + end; +parse(Bin, Boundary) -> + % Not enough data to know if the data begins with a boundary. + more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). + +-type pattern() :: {binary:cp(), non_neg_integer()}. + +%% @doc Return a compiled binary pattern with its size in bytes. +%% The pattern is the boundary prepended with "\r\n--". +-spec pattern(binary()) -> pattern(). +pattern(Boundary) -> + MatchPattern = <<"\r\n--", Boundary/binary>>, + {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}. + +%% @doc Parse remaining characters of a line beginning with the boundary. +%% If followed by "--", <em>eof</em> is returned and parsing is finished. +-spec parse_boundary_tail(binary(), pattern()) -> more(part_result()). +parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> + case Bin of + <<"--", _Rest/binary>> -> + % Boundary is followed by "--", end parsing. + eof; + _ -> + % No dash after boundary, proceed with unknown chars and lwsp + % removal. + parse_boundary_eol(Bin, Pattern) + end; +parse_boundary_tail(Bin, Pattern) -> + % Boundary may be followed by "--", need more data. + more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). + +%% @doc Skip whitespace and unknown chars until CRLF. +-spec parse_boundary_eol(binary(), pattern()) -> more(part_result()). +parse_boundary_eol(Bin, Pattern) -> + case binary:match(Bin, <<"\r\n">>) of + {CrlfStart, _Length} -> + % End of line found, remove optional whitespace. + <<_:CrlfStart/binary, Rest/binary>> = Bin, + Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, + cowboy_http:whitespace(Rest, Fun); + nomatch -> + % CRLF not found in the given binary. + RestStart = max(byte_size(Bin) - 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) + end. + +-spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()). +parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> + % The binary is at least 2 bytes long as this function is only called by + % parse_boundary_eol/3 when CRLF has been found so a more tuple will never + % be returned from here. + parse_headers(Rest, Pattern); +parse_boundary_crlf(Bin, Pattern) -> + % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is + % not followed directly by a new line. In this implementation it is + % considered part of the boundary so EOL needs to be searched again. + parse_boundary_eol(Bin, Pattern). + +-spec parse_headers(binary(), pattern()) -> more(part_result()). +parse_headers(Bin, Pattern) -> + parse_headers(Bin, Pattern, []). + +-spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()). +parse_headers(Bin, Pattern, Acc) -> + case erlang:decode_packet(httph_bin, Bin, []) of + {ok, {http_header, _, Name, _, Value}, Rest} -> + parse_headers(Rest, Pattern, [{Name, Value} | Acc]); + {ok, http_eoh, Rest} -> + Headers = lists:reverse(Acc), + {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; + {ok, {http_error, _}, _} -> + % Skip malformed parts. + skip(Bin, Pattern); + {more, _} -> + more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) + end. + +-spec parse_body(binary(), pattern()) -> more(body_result()). +parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize -> + case binary:match(Bin, P) of + {0, _Length} -> + <<_:PSize/binary, Rest/binary>> = Bin, + end_of_part(Rest, Pattern); + {BoundaryStart, _Length} -> + % Boundary found, this is the latest partial body that will be + % returned for this part. + <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin, + FResult = end_of_part(Rest, Pattern), + {body, PBody, fun () -> FResult end}; + nomatch -> + PartialLength = byte_size(Bin) - PSize + 1, + <<PBody:PartialLength/binary, Rest/binary>> = Bin, + {body, PBody, fun () -> parse_body(Rest, Pattern) end} + end; +parse_body(Bin, Pattern) -> + more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). + +-spec end_of_part(binary(), pattern()) -> end_of_part(). +end_of_part(Bin, Pattern) -> + {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. + +-spec skip(binary(), pattern()) -> more(part_result()). +skip(Bin, Pattern = {P, PSize}) -> + case binary:match(Bin, P) of + {BoundaryStart, _Length} -> + % Boundary found, proceed with parsing of the next part. + RestStart = BoundaryStart + PSize, + <<_:RestStart/binary, Rest/binary>> = Bin, + parse_boundary_tail(Rest, Pattern); + nomatch -> + % Boundary not found, need more data. + RestStart = max(byte_size(Bin) - PSize + 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) + end. + +-spec more(binary(), parser(T)) -> {more, parser(T)}. +more(<<>>, F) -> + {more, F}; +more(Bin, InnerF) -> + F = fun (NewData) when is_binary(NewData) -> + InnerF(<<Bin/binary, NewData/binary>>) + end, + {more, F}. + +%% Tests. + +-ifdef(TEST). + +multipart_test_() -> + %% {Body, Result} + Tests = [ + {<<"--boundary--">>, []}, + {<<"preamble\r\n--boundary--">>, []}, + {<<"--boundary--\r\nepilogue">>, []}, + {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, + [{[{<<"A">>, <<"b">>}, {<<"C">>, <<"d">>}], <<>>}]}, + { + << + "--boundary\r\nX-Name:answer\r\n\r\n42" + "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" + "\r\n--boundary--" + >>, + [ + {[{<<"X-Name">>, <<"answer">>}], <<"42">>}, + {[{'Server', <<"Cowboy">>}], <<"It rocks!\r\n">>} + ] + } + ], + [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. + +acc_multipart(V) -> + acc_multipart((parser(<<"boundary">>))(V), []). + +acc_multipart({headers, Headers, Cont}, Acc) -> + acc_multipart(Cont(), [{Headers, []}|Acc]); +acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> + acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); +acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> + Body = list_to_binary(lists:reverse(BodyAcc)), + acc_multipart(Cont(), [{Headers, Body}|Acc]); +acc_multipart(eof, Acc) -> + lists:reverse(Acc). + +content_disposition_test_() -> + %% {Disposition, Result} + Tests = [ + {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, + {<<"inline">>, {<<"inline">>, []}}, + {<<"attachment; \tfilename=brackets-slides.pdf">>, + {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} + ], + [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. + +title(Bin) -> + Title = lists:foldl( + fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, + Bin, + [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] + ), + iolist_to_binary(Title). + +-endif. |