From 917cf99e10c41676183d501b86af6e47c95afb89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Thu, 6 Feb 2014 19:36:25 +0100 Subject: Add and document the new multipart code The old undocumented API is removed entirely. While a documentation exists for the new API, it will not be considered set in stone until further testing has been performed, and a file upload example has been added. The new API should be a little more efficient than the old API, especially with smaller messages. --- src/cowboy_multipart.erl | 313 ----------------------------------------------- src/cowboy_req.erl | 128 +++++++++++-------- 2 files changed, 74 insertions(+), 367 deletions(-) delete mode 100644 src/cowboy_multipart.erl (limited to 'src') diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl deleted file mode 100644 index 4df5a27..0000000 --- a/src/cowboy_multipart.erl +++ /dev/null @@ -1,313 +0,0 @@ -%% Copyright (c) 2011, Anthony Ramine -%% -%% Permission to use, copy, modify, and/or distribute this software for any -%% purpose with or without fee is hereby granted, provided that the above -%% copyright notice and this permission notice appear in all copies. -%% -%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -%% @doc Multipart parser. --module(cowboy_multipart). - --export([parser/1]). --export([content_disposition/1]). - --type part_parser() :: parser(more(part_result())). --type parser(T) :: fun((binary()) -> T). --type more(T) :: T | {more, parser(T)}. --type part_result() :: headers() | eof. --type headers() :: {headers, http_headers(), body_cont()}. --type http_headers() :: [{binary(), binary()}]. --type body_cont() :: cont(more(body_result())). --type cont(T) :: fun(() -> T). --type body_result() :: {body, binary(), body_cont()} | end_of_part(). --type end_of_part() :: {end_of_part, cont(more(part_result()))}. --type disposition() :: {binary(), [{binary(), binary()}]}. - -%% API. - -%% @doc Return a multipart parser for the given boundary. --spec parser(binary()) -> part_parser(). -parser(Boundary) when is_binary(Boundary) -> - fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. - -%% @doc Parse a content disposition. -%% @todo Parse the MIME header instead of the HTTP one. --spec content_disposition(binary()) -> disposition(). -content_disposition(Data) -> - cowboy_http:token_ci(Data, - fun (_Rest, <<>>) -> {error, badarg}; - (Rest, Disposition) -> - cowboy_http:params(Rest, - fun (<<>>, Params) -> {Disposition, Params}; - (_Rest2, _) -> {error, badarg} - end) - end). - -%% Internal. - -%% @doc Entry point of the multipart parser, skips over the preamble if any. --spec parse(binary(), binary()) -> more(part_result()). -parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> - BoundarySize = byte_size(Boundary), - Pattern = pattern(Boundary), - case Bin of - <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> - % Data starts with initial boundary, skip preamble parsing. - parse_boundary_tail(Rest, Pattern); - _ -> - % Parse preamble. - skip(Bin, Pattern) - end; -parse(Bin, Boundary) -> - % Not enough data to know if the data begins with a boundary. - more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). - --type pattern() :: {binary:cp(), non_neg_integer()}. --type patterns() :: {pattern(), pattern()}. - -%% @doc Return two compiled binary patterns with their sizes in bytes. -%% The boundary pattern is the boundary prepended with "\r\n--". -%% The boundary suffix pattern matches all prefixes of the boundary. --spec pattern(binary()) -> patterns(). -pattern(Boundary) -> - MatchPattern = <<"\r\n--", Boundary/binary>>, - MatchPrefixes = prefixes(MatchPattern), - {{binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}, - {binary:compile_pattern(MatchPrefixes), byte_size(MatchPattern)}}. - -%% @doc Return all prefixes of a binary string. -%% The list of prefixes includes the full string. --spec prefixes(binary()) -> [binary()]. -prefixes(<>) -> - prefixes(Rest, <>). - --spec prefixes(binary(), binary()) -> [binary()]. -prefixes(<>, Acc) -> - [Acc|prefixes(Rest, <>)]; -prefixes(<<>>, Acc) -> - [Acc]. - -%% @doc Test if a boundary is a possble suffix. -%% The patterns are expected to have been returned from `pattern/1'. --spec suffix_match(binary(), patterns()) -> nomatch | {integer(), integer()}. -suffix_match(Bin, {_Boundary, {Pat, Len}}) -> - Size = byte_size(Bin), - suffix_match(Bin, Pat, Size, max(-Size, -Len)). - --spec suffix_match(binary(), binary:cp(), non_neg_integer(), 0|neg_integer()) -> - nomatch | {integer(), integer()}. -suffix_match(_Bin, _Pat, _Size, _Match=0) -> - nomatch; -suffix_match(Bin, Pat, Size, Match) when Match < 0 -> - case binary:match(Bin, Pat, [{scope, {Size, Match}}]) of - {Pos, Len}=Part when Pos + Len =:= Size -> Part; - {_, Len} -> suffix_match(Bin, Pat, Size, Match + Len); - nomatch -> nomatch - end. - -%% @doc Parse remaining characters of a line beginning with the boundary. -%% If followed by "--", eof is returned and parsing is finished. --spec parse_boundary_tail(binary(), patterns()) -> more(part_result()). -parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> - case Bin of - <<"--", _Rest/binary>> -> - % Boundary is followed by "--", end parsing. - eof; - _ -> - % No dash after boundary, proceed with unknown chars and lwsp - % removal. - parse_boundary_eol(Bin, Pattern) - end; -parse_boundary_tail(Bin, Pattern) -> - % Boundary may be followed by "--", need more data. - more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). - -%% @doc Skip whitespace and unknown chars until CRLF. --spec parse_boundary_eol(binary(), patterns()) -> more(part_result()). -parse_boundary_eol(Bin, Pattern) -> - case binary:match(Bin, <<"\r\n">>) of - {CrlfStart, _Length} -> - % End of line found, remove optional whitespace. - <<_:CrlfStart/binary, Rest/binary>> = Bin, - Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, - cowboy_http:whitespace(Rest, Fun); - nomatch -> - % CRLF not found in the given binary. - RestStart = max(byte_size(Bin) - 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) - end. - --spec parse_boundary_crlf(binary(), patterns()) -> more(part_result()). -parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> - % The binary is at least 2 bytes long as this function is only called by - % parse_boundary_eol/3 when CRLF has been found so a more tuple will never - % be returned from here. - parse_headers(Rest, Pattern); -parse_boundary_crlf(Bin, Pattern) -> - % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is - % not followed directly by a new line. In this implementation it is - % considered part of the boundary so EOL needs to be searched again. - parse_boundary_eol(Bin, Pattern). - --spec parse_headers(binary(), patterns()) -> more(part_result()). -parse_headers(Bin, Pattern) -> - parse_headers(Bin, Pattern, []). - --spec parse_headers(binary(), patterns(), http_headers()) -> more(part_result()). -parse_headers(Bin, Pattern, Acc) -> - case erlang:decode_packet(httph_bin, Bin, []) of - {ok, {http_header, _, Name, _, Value}, Rest} -> - Name2 = case is_atom(Name) of - true -> cowboy_bstr:to_lower(atom_to_binary(Name, latin1)); - false -> cowboy_bstr:to_lower(Name) - end, - parse_headers(Rest, Pattern, [{Name2, Value} | Acc]); - {ok, http_eoh, Rest} -> - Headers = lists:reverse(Acc), - {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; - {ok, {http_error, _}, _} -> - % Skip malformed parts. - skip(Bin, Pattern); - {more, _} -> - more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) - end. - --spec parse_body(binary(), patterns()) -> more(body_result()). -parse_body(Bin, Pattern = {{P, PSize}, _}) when byte_size(Bin) >= PSize -> - case binary:match(Bin, P) of - {0, _Length} -> - <<_:PSize/binary, Rest/binary>> = Bin, - end_of_part(Rest, Pattern); - {BoundaryStart, _Length} -> - % Boundary found, this is the latest partial body that will be - % returned for this part. - <> = Bin, - FResult = end_of_part(Rest, Pattern), - {body, PBody, fun () -> FResult end}; - nomatch -> - case suffix_match(Bin, Pattern) of - nomatch -> - %% Prefix of boundary not found at end of input. it's - %% safe to return the whole binary. Saves copying of - %% next input onto tail of current input binary. - {body, Bin, fun () -> parse_body(<<>>, Pattern) end}; - {BoundaryStart, Len} -> - PBody = binary:part(Bin, 0, BoundaryStart), - Rest = binary:part(Bin, BoundaryStart, Len), - {body, PBody, fun () -> parse_body(Rest, Pattern) end} - end - end; -parse_body(Bin, Pattern) -> - more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). - --spec end_of_part(binary(), patterns()) -> end_of_part(). -end_of_part(Bin, Pattern) -> - {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. - --spec skip(binary(), patterns()) -> more(part_result()). -skip(Bin, Pattern = {{P, PSize}, _}) -> - case binary:match(Bin, P) of - {BoundaryStart, _Length} -> - % Boundary found, proceed with parsing of the next part. - RestStart = BoundaryStart + PSize, - <<_:RestStart/binary, Rest/binary>> = Bin, - parse_boundary_tail(Rest, Pattern); - nomatch -> - % Boundary not found, need more data. - RestStart = max(byte_size(Bin) - PSize + 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) - end. - --spec more(binary(), parser(T)) -> {more, parser(T)}. -more(<<>>, F) -> - {more, F}; -more(Bin, InnerF) -> - F = fun (NewData) when is_binary(NewData) -> - InnerF(<>) - end, - {more, F}. - -%% Tests. - --ifdef(TEST). - -multipart_test_() -> - %% {Body, Result} - Tests = [ - {<<"--boundary--">>, []}, - {<<"preamble\r\n--boundary--">>, []}, - {<<"--boundary--\r\nepilogue">>, []}, - {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, - [{[{<<"a">>, <<"b">>}, {<<"c">>, <<"d">>}], <<>>}]}, - { - << - "--boundary\r\nX-Name:answer\r\n\r\n42" - "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" - "\r\n--boundary--" - >>, - [ - {[{<<"x-name">>, <<"answer">>}], <<"42">>}, - {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>} - ] - } - ], - [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. - -acc_multipart(V) -> - acc_multipart((parser(<<"boundary">>))(V), []). - -acc_multipart({headers, Headers, Cont}, Acc) -> - acc_multipart(Cont(), [{Headers, []}|Acc]); -acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> - acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); -acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> - Body = list_to_binary(lists:reverse(BodyAcc)), - acc_multipart(Cont(), [{Headers, Body}|Acc]); -acc_multipart(eof, Acc) -> - lists:reverse(Acc). - -content_disposition_test_() -> - %% {Disposition, Result} - Tests = [ - {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, - {<<"inline">>, {<<"inline">>, []}}, - {<<"attachment; \tfilename=brackets-slides.pdf">>, - {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} - ], - [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. - -title(Bin) -> - Title = lists:foldl( - fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, - Bin, - [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] - ), - iolist_to_binary(Title). - -suffix_test_() -> - Tests = [ - {nomatch, <<>>, <<"ABC">>}, - {{0, 1}, <<"\r">>, <<"ABC">>}, - {{0, 2}, <<"\r\n">>, <<"ABC">>}, - {{0, 4}, <<"\r\n--">>, <<"ABC">>}, - {{0, 5}, <<"\r\n--A">>, <<"ABC">>}, - {{0, 6}, <<"\r\n--AB">>, <<"ABC">>}, - {{0, 7}, <<"\r\n--ABC">>, <<"ABC">>}, - {nomatch, <<"\r\n--AB1">>, <<"ABC">>}, - {{1, 1}, <<"1\r">>, <<"ABC">>}, - {{2, 2}, <<"12\r\n">>, <<"ABC">>}, - {{3, 4}, <<"123\r\n--">>, <<"ABC">>} - ], - [fun() -> Part = suffix_match(Packet, pattern(Boundary)) end || - {Part, Packet, Boundary} <- Tests]. - --endif. diff --git a/src/cowboy_req.erl b/src/cowboy_req.erl index d98e395..815e4ca 100644 --- a/src/cowboy_req.erl +++ b/src/cowboy_req.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2011-2013, Loïc Hoguin +%% Copyright (c) 2011-2014, Loïc Hoguin %% Copyright (c) 2011, Anthony Ramine %% %% Permission to use, copy, modify, and/or distribute this software for any @@ -82,8 +82,11 @@ -export([body/2]). -export([body_qs/1]). -export([body_qs/2]). --export([multipart_data/1]). --export([multipart_skip/1]). + +%% Multipart API. +-export([part/1]). +-export([part_body/1]). +-export([part_body/2]). %% Response API. -export([set_resp_cookie/4]). @@ -159,8 +162,8 @@ %% Request body. body_state = waiting :: waiting | done | {stream, non_neg_integer(), transfer_decode_fun(), any(), content_decode_fun()}, - multipart = undefined :: undefined | {non_neg_integer(), fun()}, buffer = <<>> :: binary(), + multipart = undefined :: undefined | {binary(), binary()}, %% Response. resp_compress = false :: boolean(), @@ -775,61 +778,78 @@ body_qs(MaxBodyLength, Req) -> {error, Reason} end. -%% Multipart Request API. +%% Multipart API. -%% @doc Return data from the multipart parser. -%% -%% Use this function for multipart streaming. For each part in the request, -%% this function returns {headers, Headers, Req} followed by a sequence of -%% {body, Data, Req} tuples and finally {end_of_part, Req}. When there -%% is no part to parse anymore, {eof, Req} is returned. --spec multipart_data(Req) - -> {headers, cowboy:http_headers(), Req} | {body, binary(), Req} - | {end_of_part | eof, Req} when Req::req(). -multipart_data(Req=#http_req{body_state=waiting}) -> - {ok, {<<"multipart">>, _SubType, Params}, Req2} = - parse_header(<<"content-type">>, Req), - {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), - {ok, Length, Req3} = parse_header(<<"content-length">>, Req2), - multipart_data(Req3, Length, {more, cowboy_multipart:parser(Boundary)}); -multipart_data(Req=#http_req{multipart={Length, Cont}}) -> - multipart_data(Req, Length, Cont()); -multipart_data(Req=#http_req{body_state=done}) -> - {eof, Req}. - -multipart_data(Req, Length, {headers, Headers, Cont}) -> - {headers, Headers, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, Length, {body, Data, Cont}) -> - {body, Data, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, Length, {end_of_part, Cont}) -> - {end_of_part, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, 0, eof) -> - {eof, Req#http_req{body_state=done, multipart=undefined}}; -multipart_data(Req=#http_req{socket=Socket, transport=Transport}, - Length, eof) -> - %% We just want to skip so no need to stream data here. - {ok, _Data} = Transport:recv(Socket, Length, 5000), - {eof, Req#http_req{body_state=done, multipart=undefined}}; -multipart_data(Req, Length, {more, Parser}) when Length > 0 -> - case stream_body(Req) of - {ok, << Data:Length/binary, Buffer/binary >>, Req2} -> - multipart_data(Req2#http_req{buffer=Buffer}, 0, Parser(Data)); - {ok, Data, Req2} -> - multipart_data(Req2, Length - byte_size(Data), Parser(Data)) +%% @doc Return the next part's headers. +-spec part(Req) + -> {ok, cow_multipart:headers(), Req} | {done, Req} + when Req::req(). +part(Req=#http_req{multipart=undefined}) -> + part(init_multipart(Req)); +part(Req) -> + {ok, Data, Req2} = stream_multipart(Req), + part(Data, Req2). + +part(Buffer, Req=#http_req{multipart={Boundary, _}}) -> + case cow_multipart:parse_headers(Buffer, Boundary) of + more -> + {ok, Data, Req2} = stream_multipart(Req), + part(<< Buffer/binary, Data/binary >>, Req2); + {more, Buffer2} -> + {ok, Data, Req2} = stream_multipart(Req), + part(<< Buffer2/binary, Data/binary >>, Req2); + {ok, Headers, Rest} -> + {ok, Headers, Req#http_req{multipart={Boundary, Rest}}}; + %% Ignore epilogue. + {done, _} -> + {done, Req#http_req{multipart=undefined}} end. -%% @doc Skip a part returned by the multipart parser. -%% -%% This function repeatedly calls multipart_data/1 until -%% {end_of_part, Req} or {eof, Req} is parsed. --spec multipart_skip(Req) -> {ok, Req} when Req::req(). -multipart_skip(Req) -> - case multipart_data(Req) of - {end_of_part, Req2} -> {ok, Req2}; - {eof, Req2} -> {ok, Req2}; - {_, _, Req2} -> multipart_skip(Req2) +%% @doc Return the current part's body. +-spec part_body(Req) + -> {ok, binary(), Req} | {more, binary(), Req} + when Req::req(). +part_body(Req) -> + part_body(8000000, Req). + +-spec part_body(non_neg_integer(), Req) + -> {ok, binary(), Req} | {more, binary(), Req} + when Req::req(). +part_body(MaxLength, Req=#http_req{multipart=undefined}) -> + part_body(MaxLength, init_multipart(Req)); +part_body(MaxLength, Req) -> + part_body(<<>>, MaxLength, Req, <<>>). + +part_body(Buffer, MaxLength, Req=#http_req{multipart={Boundary, _}}, Acc) + when byte_size(Acc) > MaxLength -> + {more, Acc, Req#http_req{multipart={Boundary, Buffer}}}; +part_body(Buffer, MaxLength, Req=#http_req{multipart={Boundary, _}}, Acc) -> + {ok, Data, Req2} = stream_multipart(Req), + case cow_multipart:parse_body(<< Buffer/binary, Data/binary >>, Boundary) of + {ok, Body} -> + part_body(<<>>, MaxLength, Req2, << Acc/binary, Body/binary >>); + {ok, Body, Rest} -> + part_body(Rest, MaxLength, Req2, << Acc/binary, Body/binary >>); + done -> + {ok, Acc, Req2}; + {done, Body} -> + {ok, << Acc/binary, Body/binary >>, Req2}; + {done, Body, Rest} -> + {ok, << Acc/binary, Body/binary >>, + Req2#http_req{multipart={Boundary, Rest}}} end. +init_multipart(Req) -> + {ok, {<<"multipart">>, _, Params}, Req2} + = parse_header(<<"content-type">>, Req), + {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), + Req2#http_req{multipart={Boundary, <<>>}}. + +stream_multipart(Req=#http_req{multipart={_, <<>>}}) -> + stream_body(Req); +stream_multipart(Req=#http_req{multipart={Boundary, Buffer}}) -> + {ok, Buffer, Req#http_req{multipart={Boundary, <<>>}}}. + %% Response API. %% @doc Add a cookie header to the response. -- cgit v1.2.3