From 917cf99e10c41676183d501b86af6e47c95afb89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Thu, 6 Feb 2014 19:36:25 +0100 Subject: Add and document the new multipart code The old undocumented API is removed entirely. While a documentation exists for the new API, it will not be considered set in stone until further testing has been performed, and a file upload example has been added. The new API should be a little more efficient than the old API, especially with smaller messages. --- Makefile | 2 +- guide/multipart_req.md | 112 +++++++++ guide/toc.md | 2 +- manual/cowboy_req.md | 45 ++++ rebar.config | 2 +- src/cowboy_multipart.erl | 313 ------------------------- src/cowboy_req.erl | 128 +++++----- test/http_SUITE.erl | 24 +- test/http_SUITE_data/http_multipart.erl | 22 +- test/http_SUITE_data/http_multipart_stream.erl | 34 +++ 10 files changed, 298 insertions(+), 386 deletions(-) create mode 100644 guide/multipart_req.md delete mode 100644 src/cowboy_multipart.erl create mode 100644 test/http_SUITE_data/http_multipart_stream.erl diff --git a/Makefile b/Makefile index 6d98abb..c29165f 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ PLT_APPS = crypto public_key ssl # Dependencies. DEPS = cowlib ranch -dep_cowlib = pkg://cowlib 0.4.0 +dep_cowlib = pkg://cowlib 0.5.0 dep_ranch = pkg://ranch 0.9.0 TEST_DEPS = ct_helper gun diff --git a/guide/multipart_req.md b/guide/multipart_req.md new file mode 100644 index 0000000..a4d0137 --- /dev/null +++ b/guide/multipart_req.md @@ -0,0 +1,112 @@ +Multipart requests +================== + +You can read and parse multipart messages using the +Req object directly. + +Cowboy defines two functions that allows you to get +information about each part and read their contents. + +Checking the content-type +------------------------- + +While there is a variety of multipart messages, the +most common on the Web is `multipart/form-data`. It's +the type of message being sent when an HTML form +allows uploading files. + +You can quickly figure out if a multipart message +has been sent by parsing the `content-type` header. + +``` erlang +{ok, {<<"multipart">>, <<"form-data">>, _}, Req2} + = cowboy_req:parse_header(<<"content-type">>, Req). +``` + +Reading a multipart message +--------------------------- + +To read a message you have to iterate over all its +parts. Then, for each part, you can inspect its headers +and read its body. + +``` erlang +multipart(Req) -> + case cowboy_req:part(Req) of + {ok, _Headers, Req2} -> + {ok, _Body, Req3} = cowboy_req:part_body(Req2), + multipart(Req3); + {done, Req2} -> + Req2 + end. +``` + +Parts do not have a size limit. When a part body is +too big, Cowboy will return what it read so far and +allow you to continue if you wish to do so. + +The function `cow_multipart:form_data/1` can be used +to quickly obtain information about a part from a +`multipart/form-data` message. This function will +tell you if the part is for a normal field or if it +is a file being uploaded. + +This can be used for example to allow large part bodies +for files but crash when a normal field is too large. + +``` erlang +multipart(Req) -> + case cowboy_req:part(Req) of + {ok, Headers, Req2} -> + Req4 = case cow_multipart:form_data(Headers) of + {data, _FieldName} -> + {ok, _Body, Req3} = cowboy_req:part_body(Req2), + Req3; + {file, _FieldName, _Filename, _CType, _CTransferEncoding} -> + stream_file(Req2) + end, + multipart(Req4); + {done, Req2} -> + Req2 + end. + +stream_file(Req) -> + case cowboy_req:part_body(Req) of + {ok, _Body, Req2} -> + Req2; + {more, _Body, Req2} -> + stream_file(Req2) + end. +``` + +By default the body chunk Cowboy will return is limited +to 8MB. This can of course be overriden. + +Skipping unwanted parts +----------------------- + +If you do not want to read a part's body, you can skip it. +Skipping is easy. If you do not call the function to read +the part's body, Cowboy will automatically skip it when +you request the next part. + +The following snippet reads all part headers and skips +all bodies: + +``` erlang +multipart(Req) -> + case cowboy_req:part(Req) of + {ok, _Headers, Req2} -> + multipart(Req2); + {done, Req2} -> + Req2 + end. +``` + +Similarly, if you start reading the body and it ends up +being too big, you can simply continue with the next part, +Cowboy will automatically skip what remains. + +And if you started reading the message but decide that you +do not need the remaining parts, you can simply stop reading +entirely and Cowboy will automatically figure out what to do. diff --git a/guide/toc.md b/guide/toc.md index a0c9a8c..5229167 100644 --- a/guide/toc.md +++ b/guide/toc.md @@ -48,7 +48,7 @@ Multipart --------- * Understanding multipart - * Multipart requests + * [Multipart requests](multipart_req.md) * Multipart responses Server push technologies diff --git a/manual/cowboy_req.md b/manual/cowboy_req.md index 1d3841d..b943ea0 100644 --- a/manual/cowboy_req.md +++ b/manual/cowboy_req.md @@ -408,6 +408,51 @@ Request body related exports > will perform all the required initialization when it is > called the first time. +### part(Req) -> {ok, Headers, Req2} | {done, Req2} + +> Types: +> * Headers = cow_multipart:headers() +> +> Read the headers for the next part of the multipart message. +> +> Cowboy will skip any data remaining until the beginning of +> the next part. This includes the preamble to the multipart +> message but also the body of a previous part if it hasn't +> been read. Both are skipped automatically when calling this +> function. +> +> The headers returned are MIME headers, NOT HTTP headers. +> They can be parsed using the functions from the `cow_multipart` +> module. In addition, the `cow_multipart:form_data/1` function +> can be used to quickly figure out `multipart/form-data` messages. +> It takes the list of headers and returns whether this part is +> a simple form field or a file being uploaded. +> +> Note that once a part has been read, or skipped, it cannot +> be read again. + +### part_body(Req) -> part_body(8000000, Req) +### part_body(MaxReadSize, Req) -> {ok, Data, Req2} | {more, Data, Req2} + +> Types: +> * MaxReadSize = non_neg_integer() +> * Data = binary() +> +> Read the body of the current part of the multipart message. +> +> This function will read the body up to `MaxReadSize` bytes. +> This is a soft limit. If there are more data to be read +> from the socket for this part, the function will return +> what it could read inside a `more` tuple. Otherwise, it +> will return an `ok` tuple. +> +> Calling this function again after receiving a `more` tuple +> will return another chunk of body. The last chunk will be +> returned inside an `ok` tuple. +> +> Note that once the body has been read, fully or partially, +> it cannot be read again. + ### skip_body(Req) -> {ok, Req2} | {error, Reason} > Types: diff --git a/rebar.config b/rebar.config index d234ae2..662c8c2 100644 --- a/rebar.config +++ b/rebar.config @@ -1,4 +1,4 @@ {deps, [ - {cowlib, ".*", {git, "git://github.com/extend/cowlib.git", "0.4.0"}}, + {cowlib, ".*", {git, "git://github.com/extend/cowlib.git", "0.5.0"}}, {ranch, ".*", {git, "git://github.com/extend/ranch.git", "0.9.0"}} ]}. diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl deleted file mode 100644 index 4df5a27..0000000 --- a/src/cowboy_multipart.erl +++ /dev/null @@ -1,313 +0,0 @@ -%% Copyright (c) 2011, Anthony Ramine -%% -%% Permission to use, copy, modify, and/or distribute this software for any -%% purpose with or without fee is hereby granted, provided that the above -%% copyright notice and this permission notice appear in all copies. -%% -%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -%% @doc Multipart parser. --module(cowboy_multipart). - --export([parser/1]). --export([content_disposition/1]). - --type part_parser() :: parser(more(part_result())). --type parser(T) :: fun((binary()) -> T). --type more(T) :: T | {more, parser(T)}. --type part_result() :: headers() | eof. --type headers() :: {headers, http_headers(), body_cont()}. --type http_headers() :: [{binary(), binary()}]. --type body_cont() :: cont(more(body_result())). --type cont(T) :: fun(() -> T). --type body_result() :: {body, binary(), body_cont()} | end_of_part(). --type end_of_part() :: {end_of_part, cont(more(part_result()))}. --type disposition() :: {binary(), [{binary(), binary()}]}. - -%% API. - -%% @doc Return a multipart parser for the given boundary. --spec parser(binary()) -> part_parser(). -parser(Boundary) when is_binary(Boundary) -> - fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. - -%% @doc Parse a content disposition. -%% @todo Parse the MIME header instead of the HTTP one. --spec content_disposition(binary()) -> disposition(). -content_disposition(Data) -> - cowboy_http:token_ci(Data, - fun (_Rest, <<>>) -> {error, badarg}; - (Rest, Disposition) -> - cowboy_http:params(Rest, - fun (<<>>, Params) -> {Disposition, Params}; - (_Rest2, _) -> {error, badarg} - end) - end). - -%% Internal. - -%% @doc Entry point of the multipart parser, skips over the preamble if any. --spec parse(binary(), binary()) -> more(part_result()). -parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> - BoundarySize = byte_size(Boundary), - Pattern = pattern(Boundary), - case Bin of - <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> - % Data starts with initial boundary, skip preamble parsing. - parse_boundary_tail(Rest, Pattern); - _ -> - % Parse preamble. - skip(Bin, Pattern) - end; -parse(Bin, Boundary) -> - % Not enough data to know if the data begins with a boundary. - more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). - --type pattern() :: {binary:cp(), non_neg_integer()}. --type patterns() :: {pattern(), pattern()}. - -%% @doc Return two compiled binary patterns with their sizes in bytes. -%% The boundary pattern is the boundary prepended with "\r\n--". -%% The boundary suffix pattern matches all prefixes of the boundary. --spec pattern(binary()) -> patterns(). -pattern(Boundary) -> - MatchPattern = <<"\r\n--", Boundary/binary>>, - MatchPrefixes = prefixes(MatchPattern), - {{binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}, - {binary:compile_pattern(MatchPrefixes), byte_size(MatchPattern)}}. - -%% @doc Return all prefixes of a binary string. -%% The list of prefixes includes the full string. --spec prefixes(binary()) -> [binary()]. -prefixes(<>) -> - prefixes(Rest, <>). - --spec prefixes(binary(), binary()) -> [binary()]. -prefixes(<>, Acc) -> - [Acc|prefixes(Rest, <>)]; -prefixes(<<>>, Acc) -> - [Acc]. - -%% @doc Test if a boundary is a possble suffix. -%% The patterns are expected to have been returned from `pattern/1'. --spec suffix_match(binary(), patterns()) -> nomatch | {integer(), integer()}. -suffix_match(Bin, {_Boundary, {Pat, Len}}) -> - Size = byte_size(Bin), - suffix_match(Bin, Pat, Size, max(-Size, -Len)). - --spec suffix_match(binary(), binary:cp(), non_neg_integer(), 0|neg_integer()) -> - nomatch | {integer(), integer()}. -suffix_match(_Bin, _Pat, _Size, _Match=0) -> - nomatch; -suffix_match(Bin, Pat, Size, Match) when Match < 0 -> - case binary:match(Bin, Pat, [{scope, {Size, Match}}]) of - {Pos, Len}=Part when Pos + Len =:= Size -> Part; - {_, Len} -> suffix_match(Bin, Pat, Size, Match + Len); - nomatch -> nomatch - end. - -%% @doc Parse remaining characters of a line beginning with the boundary. -%% If followed by "--", eof is returned and parsing is finished. --spec parse_boundary_tail(binary(), patterns()) -> more(part_result()). -parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> - case Bin of - <<"--", _Rest/binary>> -> - % Boundary is followed by "--", end parsing. - eof; - _ -> - % No dash after boundary, proceed with unknown chars and lwsp - % removal. - parse_boundary_eol(Bin, Pattern) - end; -parse_boundary_tail(Bin, Pattern) -> - % Boundary may be followed by "--", need more data. - more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). - -%% @doc Skip whitespace and unknown chars until CRLF. --spec parse_boundary_eol(binary(), patterns()) -> more(part_result()). -parse_boundary_eol(Bin, Pattern) -> - case binary:match(Bin, <<"\r\n">>) of - {CrlfStart, _Length} -> - % End of line found, remove optional whitespace. - <<_:CrlfStart/binary, Rest/binary>> = Bin, - Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, - cowboy_http:whitespace(Rest, Fun); - nomatch -> - % CRLF not found in the given binary. - RestStart = max(byte_size(Bin) - 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) - end. - --spec parse_boundary_crlf(binary(), patterns()) -> more(part_result()). -parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> - % The binary is at least 2 bytes long as this function is only called by - % parse_boundary_eol/3 when CRLF has been found so a more tuple will never - % be returned from here. - parse_headers(Rest, Pattern); -parse_boundary_crlf(Bin, Pattern) -> - % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is - % not followed directly by a new line. In this implementation it is - % considered part of the boundary so EOL needs to be searched again. - parse_boundary_eol(Bin, Pattern). - --spec parse_headers(binary(), patterns()) -> more(part_result()). -parse_headers(Bin, Pattern) -> - parse_headers(Bin, Pattern, []). - --spec parse_headers(binary(), patterns(), http_headers()) -> more(part_result()). -parse_headers(Bin, Pattern, Acc) -> - case erlang:decode_packet(httph_bin, Bin, []) of - {ok, {http_header, _, Name, _, Value}, Rest} -> - Name2 = case is_atom(Name) of - true -> cowboy_bstr:to_lower(atom_to_binary(Name, latin1)); - false -> cowboy_bstr:to_lower(Name) - end, - parse_headers(Rest, Pattern, [{Name2, Value} | Acc]); - {ok, http_eoh, Rest} -> - Headers = lists:reverse(Acc), - {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; - {ok, {http_error, _}, _} -> - % Skip malformed parts. - skip(Bin, Pattern); - {more, _} -> - more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) - end. - --spec parse_body(binary(), patterns()) -> more(body_result()). -parse_body(Bin, Pattern = {{P, PSize}, _}) when byte_size(Bin) >= PSize -> - case binary:match(Bin, P) of - {0, _Length} -> - <<_:PSize/binary, Rest/binary>> = Bin, - end_of_part(Rest, Pattern); - {BoundaryStart, _Length} -> - % Boundary found, this is the latest partial body that will be - % returned for this part. - <> = Bin, - FResult = end_of_part(Rest, Pattern), - {body, PBody, fun () -> FResult end}; - nomatch -> - case suffix_match(Bin, Pattern) of - nomatch -> - %% Prefix of boundary not found at end of input. it's - %% safe to return the whole binary. Saves copying of - %% next input onto tail of current input binary. - {body, Bin, fun () -> parse_body(<<>>, Pattern) end}; - {BoundaryStart, Len} -> - PBody = binary:part(Bin, 0, BoundaryStart), - Rest = binary:part(Bin, BoundaryStart, Len), - {body, PBody, fun () -> parse_body(Rest, Pattern) end} - end - end; -parse_body(Bin, Pattern) -> - more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). - --spec end_of_part(binary(), patterns()) -> end_of_part(). -end_of_part(Bin, Pattern) -> - {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. - --spec skip(binary(), patterns()) -> more(part_result()). -skip(Bin, Pattern = {{P, PSize}, _}) -> - case binary:match(Bin, P) of - {BoundaryStart, _Length} -> - % Boundary found, proceed with parsing of the next part. - RestStart = BoundaryStart + PSize, - <<_:RestStart/binary, Rest/binary>> = Bin, - parse_boundary_tail(Rest, Pattern); - nomatch -> - % Boundary not found, need more data. - RestStart = max(byte_size(Bin) - PSize + 1, 0), - <<_:RestStart/binary, Rest/binary>> = Bin, - more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) - end. - --spec more(binary(), parser(T)) -> {more, parser(T)}. -more(<<>>, F) -> - {more, F}; -more(Bin, InnerF) -> - F = fun (NewData) when is_binary(NewData) -> - InnerF(<>) - end, - {more, F}. - -%% Tests. - --ifdef(TEST). - -multipart_test_() -> - %% {Body, Result} - Tests = [ - {<<"--boundary--">>, []}, - {<<"preamble\r\n--boundary--">>, []}, - {<<"--boundary--\r\nepilogue">>, []}, - {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, - [{[{<<"a">>, <<"b">>}, {<<"c">>, <<"d">>}], <<>>}]}, - { - << - "--boundary\r\nX-Name:answer\r\n\r\n42" - "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" - "\r\n--boundary--" - >>, - [ - {[{<<"x-name">>, <<"answer">>}], <<"42">>}, - {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>} - ] - } - ], - [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. - -acc_multipart(V) -> - acc_multipart((parser(<<"boundary">>))(V), []). - -acc_multipart({headers, Headers, Cont}, Acc) -> - acc_multipart(Cont(), [{Headers, []}|Acc]); -acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> - acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); -acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> - Body = list_to_binary(lists:reverse(BodyAcc)), - acc_multipart(Cont(), [{Headers, Body}|Acc]); -acc_multipart(eof, Acc) -> - lists:reverse(Acc). - -content_disposition_test_() -> - %% {Disposition, Result} - Tests = [ - {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, - {<<"inline">>, {<<"inline">>, []}}, - {<<"attachment; \tfilename=brackets-slides.pdf">>, - {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} - ], - [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. - -title(Bin) -> - Title = lists:foldl( - fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, - Bin, - [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] - ), - iolist_to_binary(Title). - -suffix_test_() -> - Tests = [ - {nomatch, <<>>, <<"ABC">>}, - {{0, 1}, <<"\r">>, <<"ABC">>}, - {{0, 2}, <<"\r\n">>, <<"ABC">>}, - {{0, 4}, <<"\r\n--">>, <<"ABC">>}, - {{0, 5}, <<"\r\n--A">>, <<"ABC">>}, - {{0, 6}, <<"\r\n--AB">>, <<"ABC">>}, - {{0, 7}, <<"\r\n--ABC">>, <<"ABC">>}, - {nomatch, <<"\r\n--AB1">>, <<"ABC">>}, - {{1, 1}, <<"1\r">>, <<"ABC">>}, - {{2, 2}, <<"12\r\n">>, <<"ABC">>}, - {{3, 4}, <<"123\r\n--">>, <<"ABC">>} - ], - [fun() -> Part = suffix_match(Packet, pattern(Boundary)) end || - {Part, Packet, Boundary} <- Tests]. - --endif. diff --git a/src/cowboy_req.erl b/src/cowboy_req.erl index d98e395..815e4ca 100644 --- a/src/cowboy_req.erl +++ b/src/cowboy_req.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2011-2013, Loïc Hoguin +%% Copyright (c) 2011-2014, Loïc Hoguin %% Copyright (c) 2011, Anthony Ramine %% %% Permission to use, copy, modify, and/or distribute this software for any @@ -82,8 +82,11 @@ -export([body/2]). -export([body_qs/1]). -export([body_qs/2]). --export([multipart_data/1]). --export([multipart_skip/1]). + +%% Multipart API. +-export([part/1]). +-export([part_body/1]). +-export([part_body/2]). %% Response API. -export([set_resp_cookie/4]). @@ -159,8 +162,8 @@ %% Request body. body_state = waiting :: waiting | done | {stream, non_neg_integer(), transfer_decode_fun(), any(), content_decode_fun()}, - multipart = undefined :: undefined | {non_neg_integer(), fun()}, buffer = <<>> :: binary(), + multipart = undefined :: undefined | {binary(), binary()}, %% Response. resp_compress = false :: boolean(), @@ -775,61 +778,78 @@ body_qs(MaxBodyLength, Req) -> {error, Reason} end. -%% Multipart Request API. +%% Multipart API. -%% @doc Return data from the multipart parser. -%% -%% Use this function for multipart streaming. For each part in the request, -%% this function returns {headers, Headers, Req} followed by a sequence of -%% {body, Data, Req} tuples and finally {end_of_part, Req}. When there -%% is no part to parse anymore, {eof, Req} is returned. --spec multipart_data(Req) - -> {headers, cowboy:http_headers(), Req} | {body, binary(), Req} - | {end_of_part | eof, Req} when Req::req(). -multipart_data(Req=#http_req{body_state=waiting}) -> - {ok, {<<"multipart">>, _SubType, Params}, Req2} = - parse_header(<<"content-type">>, Req), - {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), - {ok, Length, Req3} = parse_header(<<"content-length">>, Req2), - multipart_data(Req3, Length, {more, cowboy_multipart:parser(Boundary)}); -multipart_data(Req=#http_req{multipart={Length, Cont}}) -> - multipart_data(Req, Length, Cont()); -multipart_data(Req=#http_req{body_state=done}) -> - {eof, Req}. - -multipart_data(Req, Length, {headers, Headers, Cont}) -> - {headers, Headers, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, Length, {body, Data, Cont}) -> - {body, Data, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, Length, {end_of_part, Cont}) -> - {end_of_part, Req#http_req{multipart={Length, Cont}}}; -multipart_data(Req, 0, eof) -> - {eof, Req#http_req{body_state=done, multipart=undefined}}; -multipart_data(Req=#http_req{socket=Socket, transport=Transport}, - Length, eof) -> - %% We just want to skip so no need to stream data here. - {ok, _Data} = Transport:recv(Socket, Length, 5000), - {eof, Req#http_req{body_state=done, multipart=undefined}}; -multipart_data(Req, Length, {more, Parser}) when Length > 0 -> - case stream_body(Req) of - {ok, << Data:Length/binary, Buffer/binary >>, Req2} -> - multipart_data(Req2#http_req{buffer=Buffer}, 0, Parser(Data)); - {ok, Data, Req2} -> - multipart_data(Req2, Length - byte_size(Data), Parser(Data)) +%% @doc Return the next part's headers. +-spec part(Req) + -> {ok, cow_multipart:headers(), Req} | {done, Req} + when Req::req(). +part(Req=#http_req{multipart=undefined}) -> + part(init_multipart(Req)); +part(Req) -> + {ok, Data, Req2} = stream_multipart(Req), + part(Data, Req2). + +part(Buffer, Req=#http_req{multipart={Boundary, _}}) -> + case cow_multipart:parse_headers(Buffer, Boundary) of + more -> + {ok, Data, Req2} = stream_multipart(Req), + part(<< Buffer/binary, Data/binary >>, Req2); + {more, Buffer2} -> + {ok, Data, Req2} = stream_multipart(Req), + part(<< Buffer2/binary, Data/binary >>, Req2); + {ok, Headers, Rest} -> + {ok, Headers, Req#http_req{multipart={Boundary, Rest}}}; + %% Ignore epilogue. + {done, _} -> + {done, Req#http_req{multipart=undefined}} end. -%% @doc Skip a part returned by the multipart parser. -%% -%% This function repeatedly calls multipart_data/1 until -%% {end_of_part, Req} or {eof, Req} is parsed. --spec multipart_skip(Req) -> {ok, Req} when Req::req(). -multipart_skip(Req) -> - case multipart_data(Req) of - {end_of_part, Req2} -> {ok, Req2}; - {eof, Req2} -> {ok, Req2}; - {_, _, Req2} -> multipart_skip(Req2) +%% @doc Return the current part's body. +-spec part_body(Req) + -> {ok, binary(), Req} | {more, binary(), Req} + when Req::req(). +part_body(Req) -> + part_body(8000000, Req). + +-spec part_body(non_neg_integer(), Req) + -> {ok, binary(), Req} | {more, binary(), Req} + when Req::req(). +part_body(MaxLength, Req=#http_req{multipart=undefined}) -> + part_body(MaxLength, init_multipart(Req)); +part_body(MaxLength, Req) -> + part_body(<<>>, MaxLength, Req, <<>>). + +part_body(Buffer, MaxLength, Req=#http_req{multipart={Boundary, _}}, Acc) + when byte_size(Acc) > MaxLength -> + {more, Acc, Req#http_req{multipart={Boundary, Buffer}}}; +part_body(Buffer, MaxLength, Req=#http_req{multipart={Boundary, _}}, Acc) -> + {ok, Data, Req2} = stream_multipart(Req), + case cow_multipart:parse_body(<< Buffer/binary, Data/binary >>, Boundary) of + {ok, Body} -> + part_body(<<>>, MaxLength, Req2, << Acc/binary, Body/binary >>); + {ok, Body, Rest} -> + part_body(Rest, MaxLength, Req2, << Acc/binary, Body/binary >>); + done -> + {ok, Acc, Req2}; + {done, Body} -> + {ok, << Acc/binary, Body/binary >>, Req2}; + {done, Body, Rest} -> + {ok, << Acc/binary, Body/binary >>, + Req2#http_req{multipart={Boundary, Rest}}} end. +init_multipart(Req) -> + {ok, {<<"multipart">>, _, Params}, Req2} + = parse_header(<<"content-type">>, Req), + {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), + Req2#http_req{multipart={Boundary, <<>>}}. + +stream_multipart(Req=#http_req{multipart={_, <<>>}}) -> + stream_body(Req); +stream_multipart(Req=#http_req{multipart={Boundary, Buffer}}) -> + {ok, Buffer, Req#http_req{multipart={Boundary, <<>>}}}. + %% Response API. %% @doc Add a cookie header to the response. diff --git a/test/http_SUITE.erl b/test/http_SUITE.erl index 13e7b0b..20c65c8 100644 --- a/test/http_SUITE.erl +++ b/test/http_SUITE.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2011-2013, Loïc Hoguin +%% Copyright (c) 2011-2014, Loïc Hoguin %% Copyright (c) 2011, Anthony Ramine %% %% Permission to use, copy, modify, and/or distribute this software for any @@ -49,6 +49,7 @@ -export([keepalive_nl/1]). -export([keepalive_stream_loop/1]). -export([multipart/1]). +-export([multipart_large/1]). -export([nc_rand/1]). -export([nc_zero/1]). -export([onrequest/1]). @@ -135,6 +136,7 @@ groups() -> keepalive_nl, keepalive_stream_loop, multipart, + multipart_large, nc_rand, nc_zero, pipeline, @@ -391,6 +393,7 @@ init_dispatch(Config) -> {"/static_specify_file/[...]", cowboy_static, {file, ?config(static_dir, Config) ++ "/style.css"}}, {"/multipart", http_multipart, []}, + {"/multipart/large", http_multipart_stream, []}, {"/echo/body", http_echo_body, []}, {"/echo/body_qs", http_body_qs, []}, {"/param_all", rest_param_all, []}, @@ -755,8 +758,8 @@ multipart(Config) -> "This is a preamble." "\r\n--OHai\r\nX-Name:answer\r\n\r\n42" "\r\n--OHai\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" - "\r\n--OHai--" - "This is an epiloque." + "\r\n--OHai--\r\n" + "This is an epilogue." >>, {ok, Client2} = cowboy_client:request(<<"POST">>, build_url("/multipart", Config), @@ -770,6 +773,21 @@ multipart(Config) -> {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>} ]. +multipart_large(Config) -> + Client = ?config(client, Config), + Boundary = "----------", + Big = << 0:9000000/unit:8 >>, + Bigger = << 0:9999999/unit:8 >>, + Body = ["--", Boundary, "\r\ncontent-length: 9000000\r\n\r\n", Big, "\r\n", + "--", Boundary, "\r\ncontent-length: 9999999\r\n\r\n", Bigger, "\r\n", + "--", Boundary, "--\r\n"], + {ok, Client2} = cowboy_client:request(<<"POST">>, + build_url("/multipart/large", Config), + [{<<"content-type">>, ["multipart/x-large; boundary=", Boundary]}], + Body, Client), + {ok, 200, _, _} = cowboy_client:response(Client2), + ok. + nc_reqs(Config, Input) -> Cat = os:find_executable("cat"), Nc = os:find_executable("nc"), diff --git a/test/http_SUITE_data/http_multipart.erl b/test/http_SUITE_data/http_multipart.erl index c94739f..79bfeb8 100644 --- a/test/http_SUITE_data/http_multipart.erl +++ b/test/http_SUITE_data/http_multipart.erl @@ -8,22 +8,18 @@ init({_Transport, http}, Req, []) -> {ok, Req, {}}. handle(Req, State) -> - {Result, Req2} = acc_multipart(Req), + {Result, Req2} = acc_multipart(Req, []), {ok, Req3} = cowboy_req:reply(200, [], term_to_binary(Result), Req2), {ok, Req3, State}. terminate(_, _, _) -> ok. -acc_multipart(Req) -> - acc_multipart(cowboy_req:multipart_data(Req), []). - -acc_multipart({headers, Headers, Req}, Acc) -> - acc_multipart(cowboy_req:multipart_data(Req), [{Headers, []}|Acc]); -acc_multipart({body, Data, Req}, [{Headers, BodyAcc}|Acc]) -> - acc_multipart(cowboy_req:multipart_data(Req), [{Headers, [Data|BodyAcc]}|Acc]); -acc_multipart({end_of_part, Req}, [{Headers, BodyAcc}|Acc]) -> - acc_multipart(cowboy_req:multipart_data(Req), - [{Headers, list_to_binary(lists:reverse(BodyAcc))}|Acc]); -acc_multipart({eof, Req}, Acc) -> - {lists:reverse(Acc), Req}. +acc_multipart(Req, Acc) -> + case cowboy_req:part(Req) of + {ok, Headers, Req2} -> + {ok, Body, Req3} = cowboy_req:part_body(Req2), + acc_multipart(Req3, [{Headers, Body}|Acc]); + {done, Req2} -> + {lists:reverse(Acc), Req2} + end. diff --git a/test/http_SUITE_data/http_multipart_stream.erl b/test/http_SUITE_data/http_multipart_stream.erl new file mode 100644 index 0000000..926d150 --- /dev/null +++ b/test/http_SUITE_data/http_multipart_stream.erl @@ -0,0 +1,34 @@ +%% Feel free to use, reuse and abuse the code in this file. + +-module(http_multipart_stream). +-behaviour(cowboy_http_handler). +-export([init/3, handle/2, terminate/3]). + +init(_, Req, []) -> + {ok, Req, undefined}. + +handle(Req, State) -> + Req2 = multipart(Req), + {ok, Req3} = cowboy_req:reply(200, Req2), + {ok, Req3, State}. + +terminate(_, _, _) -> + ok. + +multipart(Req) -> + case cowboy_req:part(Req) of + {ok, [{<<"content-length">>, BinLength}], Req2} -> + Length = list_to_integer(binary_to_list(BinLength)), + {Length, Req3} = stream_body(Req2, 0), + multipart(Req3); + {done, Req2} -> + Req2 + end. + +stream_body(Req, N) -> + case cowboy_req:part_body(Req) of + {ok, Data, Req2} -> + {N + byte_size(Data), Req2}; + {more, Data, Req2} -> + stream_body(Req2, N + byte_size(Data)) + end. -- cgit v1.2.3