diff options
-rw-r--r-- | include/http.hrl | 3 | ||||
-rw-r--r-- | src/cowboy_http.erl | 4 | ||||
-rw-r--r-- | src/cowboy_http_protocol.erl | 5 | ||||
-rw-r--r-- | src/cowboy_http_req.erl | 69 | ||||
-rw-r--r-- | src/cowboy_multipart.erl | 249 | ||||
-rw-r--r-- | test/http_SUITE.erl | 24 | ||||
-rw-r--r-- | test/http_handler_multipart.erl | 29 |
7 files changed, 376 insertions, 7 deletions
diff --git a/include/http.hrl b/include/http.hrl index a7355f4..a10b120 100644 --- a/include/http.hrl +++ b/include/http.hrl @@ -66,7 +66,8 @@ meta = [] :: [{atom(), any()}], %% Request body. - body_state = waiting :: waiting | done, + body_state = waiting :: waiting | done | + {multipart, non_neg_integer(), fun()}, buffer = <<>> :: binary(), %% Response. diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index 99536a3..7c1a2d3 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -17,10 +17,10 @@ -module(cowboy_http). %% Parsing. --export([list/2, nonempty_list/2, content_type/1, +-export([list/2, nonempty_list/2, content_type/1, content_type_params/3, media_range/2, conneg/2, language_range/2, entity_tag_match/1, http_date/1, rfc1123_date/1, rfc850_date/1, asctime_date/1, - digits/1, token/2, token_ci/2, quoted_string/2]). + whitespace/2, digits/1, token/2, token_ci/2, quoted_string/2]). %% Interpretation. -export([connection_to_atom/1, urldecode/1, urldecode/2, urlencode/1, diff --git a/src/cowboy_http_protocol.erl b/src/cowboy_http_protocol.erl index ea59799..cd951d1 100644 --- a/src/cowboy_http_protocol.erl +++ b/src/cowboy_http_protocol.erl @@ -380,7 +380,10 @@ ensure_body_processed(Req=#http_req{body_state=waiting}) -> {error, badarg} -> ok; %% No body. {error, _Reason} -> close; _Any -> ok - end. + end; +ensure_body_processed(Req=#http_req{body_state={multipart, _, _}}) -> + {ok, Req2} = cowboy_http_req:multipart_skip(Req), + ensure_body_processed(Req2). -spec ensure_response(#http_req{}) -> ok. %% The handler has already fully replied to the client. diff --git a/src/cowboy_http_req.erl b/src/cowboy_http_req.erl index b0a0232..aa30d2c 100644 --- a/src/cowboy_http_req.erl +++ b/src/cowboy_http_req.erl @@ -34,7 +34,8 @@ ]). %% Request API. -export([ - body/1, body/2, body_qs/1 + body/1, body/2, body_qs/1, + multipart_data/1, multipart_skip/1 ]). %% Request Body API. -export([ @@ -363,6 +364,7 @@ meta(Name, Req, Default) -> %% @doc Return the full body sent with the request, or <em>{error, badarg}</em> %% if no <em>Content-Length</em> is available. %% @todo We probably want to allow a max length. +%% @todo Add multipart support to this function. -spec body(#http_req{}) -> {ok, binary(), #http_req{}} | {error, atom()}. body(Req) -> {Length, Req2} = cowboy_http_req:parse_header('Content-Length', Req), @@ -400,6 +402,71 @@ body_qs(Req=#http_req{urldecode={URLDecFun, URLDecArg}}) -> {ok, Body, Req2} = body(Req), {parse_qs(Body, fun(Bin) -> URLDecFun(Bin, URLDecArg) end), Req2}. +%% Multipart Request API. + +%% @doc Return data from the multipart parser. +%% +%% Use this function for multipart streaming. For each part in the request, +%% this function returns <em>{headers, Headers}</em> followed by a sequence of +%% <em>{data, Data}</em> tuples and finally <em>end_of_part</em>. When there +%% is no part to parse anymore, <em>eof</em> is returned. +%% +%% If the request Content-Type is not a multipart one, <em>{error, badarg}</em> +%% is returned. +-spec multipart_data(#http_req{}) + -> {{headers, http_headers()} | {data, binary()} | end_of_part | eof, + #http_req{}}. +multipart_data(Req=#http_req{body_state=waiting}) -> + {{<<"multipart">>, _SubType, Params}, Req2} = + parse_header('Content-Type', Req), + {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), + {Length, Req3=#http_req{buffer=Buffer}} = + parse_header('Content-Length', Req2), + multipart_data(Req3, Length, cowboy_multipart:parser(Boundary), Buffer); +multipart_data(Req=#http_req{body_state={multipart, Length, Cont}}) -> + multipart_data(Req, Length, Cont()); +multipart_data(Req=#http_req{body_state=done}) -> + {eof, Req}. + +multipart_data(Req, Length, Parser, Buffer) when byte_size(Buffer) >= Length -> + << Data:Length/binary, Rest/binary >> = Buffer, + multipart_data(Req#http_req{buffer=Rest}, 0, Parser(Data)); +multipart_data(Req, Length, Parser, Buffer) -> + NewLength = Length - byte_size(Buffer), + multipart_data(Req#http_req{buffer= <<>>}, NewLength, Parser(Buffer)). + +multipart_data(Req, Length, {headers, Headers, Cont}) -> + {{headers, Headers}, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, Length, {body, Data, Cont}) -> + {{body, Data}, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, Length, {end_of_part, Cont}) -> + {end_of_part, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, 0, eof) -> + {eof, Req#http_req{body_state=done}}; +multipart_data(Req=#http_req{socket=Socket, transport=Transport}, + Length, eof) -> + {ok, _Data} = Transport:recv(Socket, Length, 5000), + {eof, Req#http_req{body_state=done}}; +multipart_data(Req=#http_req{socket=Socket, transport=Transport}, + Length, {more, Parser}) when Length > 0 -> + case Transport:recv(Socket, 0, 5000) of + {ok, << Data:Length/binary, Buffer/binary >>} -> + multipart_data(Req#http_req{buffer=Buffer}, 0, Parser(Data)); + {ok, Data} -> + multipart_data(Req, Length - byte_size(Data), Parser(Data)) + end. + +%% @doc Skip a part returned by the multipart parser. +%% +%% This function repeatedly calls <em>multipart_data/1</em> until +%% <em>end_of_part</em> or <em>eof</em> is parsed. +multipart_skip(Req) -> + case multipart_data(Req) of + {end_of_part, Req2} -> {ok, Req2}; + {eof, Req2} -> {ok, Req2}; + {_Other, Req2} -> multipart_skip(Req2) + end. + %% Response API. %% @doc Add a cookie header to the response. diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl new file mode 100644 index 0000000..b7aeb54 --- /dev/null +++ b/src/cowboy_multipart.erl @@ -0,0 +1,249 @@ +%% Copyright (c) 2011, Anthony Ramine <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% @doc Multipart parser. +-module(cowboy_multipart). + +-type part_parser() :: parser(more(part_result())). +-type parser(T) :: fun((binary()) -> T). +-type more(T) :: T | {more, parser(T)}. +-type part_result() :: headers() | eof. +-type headers() :: {headers, http_headers(), body_cont()}. +-type http_headers() :: [{atom() | binary(), binary()}]. +-type body_cont() :: cont(more(body_result())). +-type cont(T) :: fun(() -> T). +-type body_result() :: {body, binary(), body_cont()} | end_of_part(). +-type end_of_part() :: {end_of_part, cont(more(part_result()))}. +-type disposition() :: {binary(), [{binary(), binary()}]}. + +-export([parser/1, content_disposition/1]). + +-include_lib("eunit/include/eunit.hrl"). + +%% API. + +%% @doc Return a multipart parser for the given boundary. +-spec parser(binary()) -> part_parser(). +parser(Boundary) when is_binary(Boundary) -> + fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. + +%% @doc Parse a content disposition. +%% @todo Parse the MIME header instead of the HTTP one. +-spec content_disposition(binary()) -> disposition(). +content_disposition(Data) -> + cowboy_http:token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Disposition) -> + cowboy_http:content_type_params(Rest, + fun (Params) -> {Disposition, Params} end, []) + end). + +%% Internal. + +%% @doc Entry point of the multipart parser, skips over the preamble if any. +-spec parse(binary(), binary()) -> more(part_result()). +parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> + BoundarySize = byte_size(Boundary), + Pattern = pattern(Boundary), + case Bin of + <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> + % Data starts with initial boundary, skip preamble parsing. + parse_boundary_tail(Rest, Pattern); + _ -> + % Parse preamble. + skip(Bin, Pattern) + end; +parse(Bin, Boundary) -> + % Not enough data to know if the data begins with a boundary. + more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). + +-type pattern() :: {binary:cp(), non_neg_integer()}. + +%% @doc Return a compiled binary pattern with its size in bytes. +%% The pattern is the boundary prepended with "\r\n--". +-spec pattern(binary()) -> pattern(). +pattern(Boundary) -> + MatchPattern = <<"\r\n--", Boundary/binary>>, + {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}. + +%% @doc Parse remaining characters of a line beginning with the boundary. +%% If followed by "--", <em>eof</em> is returned and parsing is finished. +-spec parse_boundary_tail(binary(), pattern()) -> more(part_result()). +parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> + case Bin of + <<"--", _Rest/binary>> -> + % Boundary is followed by "--", end parsing. + eof; + _ -> + % No dash after boundary, proceed with unknown chars and lwsp + % removal. + parse_boundary_eol(Bin, Pattern) + end; +parse_boundary_tail(Bin, Pattern) -> + % Boundary may be followed by "--", need more data. + more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). + +%% @doc Skip whitespace and unknown chars until CRLF. +-spec parse_boundary_eol(binary(), pattern()) -> more(part_result()). +parse_boundary_eol(Bin, Pattern) -> + case binary:match(Bin, <<"\r\n">>) of + {CrlfStart, _Length} -> + % End of line found, remove optional whitespace. + <<_:CrlfStart/binary, Rest/binary>> = Bin, + Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, + cowboy_http:whitespace(Rest, Fun); + nomatch -> + % CRLF not found in the given binary. + RestStart = max(byte_size(Bin) - 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) + end. + +-spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()). +parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> + % The binary is at least 2 bytes long as this function is only called by + % parse_boundary_eol/3 when CRLF has been found so a more tuple will never + % be returned from here. + parse_headers(Rest, Pattern); +parse_boundary_crlf(Bin, Pattern) -> + % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is + % not followed directly by a new line. In this implementation it is + % considered part of the boundary so EOL needs to be searched again. + parse_boundary_eol(Bin, Pattern). + +-spec parse_headers(binary(), pattern()) -> more(part_result()). +parse_headers(Bin, Pattern) -> + parse_headers(Bin, Pattern, []). + +-spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()). +parse_headers(Bin, Pattern, Acc) -> + case erlang:decode_packet(httph_bin, Bin, []) of + {ok, {http_header, _, Name, _, Value}, Rest} -> + parse_headers(Rest, Pattern, [{Name, Value} | Acc]); + {ok, http_eoh, Rest} -> + Headers = lists:reverse(Acc), + {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; + {ok, {http_error, _}, _} -> + % Skip malformed parts. + skip(Bin, Pattern); + {more, _} -> + more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) + end. + +-spec parse_body(binary(), pattern()) -> more(body_result()). +parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize -> + case binary:match(Bin, P) of + {0, _Length} -> + <<_:PSize/binary, Rest/binary>> = Bin, + end_of_part(Rest, Pattern); + {BoundaryStart, _Length} -> + % Boundary found, this is the latest partial body that will be + % returned for this part. + <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin, + FResult = end_of_part(Rest, Pattern), + {body, PBody, fun () -> FResult end}; + nomatch -> + PartialLength = byte_size(Bin) - PSize + 1, + <<PBody:PartialLength/binary, Rest/binary>> = Bin, + {body, PBody, fun () -> parse_body(Rest, Pattern) end} + end; +parse_body(Bin, Pattern) -> + more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). + +-spec end_of_part(binary(), pattern()) -> end_of_part(). +end_of_part(Bin, Pattern) -> + {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. + +-spec skip(binary(), pattern()) -> more(part_result()). +skip(Bin, Pattern = {P, PSize}) -> + case binary:match(Bin, P) of + {BoundaryStart, _Length} -> + % Boundary found, proceed with parsing of the next part. + RestStart = BoundaryStart + PSize, + <<_:RestStart/binary, Rest/binary>> = Bin, + parse_boundary_tail(Rest, Pattern); + nomatch -> + % Boundary not found, need more data. + RestStart = max(byte_size(Bin) - PSize + 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) + end. + +-spec more(binary(), parser(T)) -> {more, parser(T)}. +more(<<>>, F) -> + {more, F}; +more(Bin, InnerF) -> + F = fun (NewData) when is_binary(NewData) -> + InnerF(<<Bin/binary, NewData/binary>>) + end, + {more, F}. + +%% Tests. + +-ifdef(TEST). + +multipart_test_() -> + %% {Body, Result} + Tests = [ + {<<"--boundary--">>, []}, + {<<"preamble\r\n--boundary--">>, []}, + {<<"--boundary--\r\nepilogue">>, []}, + {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, + [{[{<<"A">>, <<"b">>}, {<<"C">>, <<"d">>}], <<>>}]}, + { + << + "--boundary\r\nX-Name:answer\r\n\r\n42" + "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" + "\r\n--boundary--" + >>, + [ + {[{<<"X-Name">>, <<"answer">>}], <<"42">>}, + {[{'Server', <<"Cowboy">>}], <<"It rocks!\r\n">>} + ] + } + ], + [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. + +acc_multipart(V) -> + acc_multipart((parser(<<"boundary">>))(V), []). + +acc_multipart({headers, Headers, Cont}, Acc) -> + acc_multipart(Cont(), [{Headers, []}|Acc]); +acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> + acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); +acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> + Body = list_to_binary(lists:reverse(BodyAcc)), + acc_multipart(Cont(), [{Headers, Body}|Acc]); +acc_multipart(eof, Acc) -> + lists:reverse(Acc). + +content_disposition_test_() -> + %% {Disposition, Result} + Tests = [ + {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, + {<<"inline">>, {<<"inline">>, []}}, + {<<"attachment; \tfilename=brackets-slides.pdf">>, + {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} + ], + [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. + +title(Bin) -> + Title = lists:foldl( + fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, + Bin, + [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] + ), + iolist_to_binary(Title). + +-endif. diff --git a/test/http_SUITE.erl b/test/http_SUITE.erl index 002ac4d..22ebb51 100644 --- a/test/http_SUITE.erl +++ b/test/http_SUITE.erl @@ -1,4 +1,5 @@ %% Copyright (c) 2011, Loïc Hoguin <[email protected]> +%% Copyright (c) 2011, Anthony Ramine <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -23,7 +24,7 @@ pipeline/1, raw/1, set_resp_header/1, set_resp_overwrite/1, set_resp_body/1, stream_body_set_resp/1, response_as_req/1, static_mimetypes_function/1, static_attribute_etag/1, - static_function_etag/1]). %% http. + static_function_etag/1, multipart/1]). %% http. -export([http_200/1, http_404/1, handler_errors/1, file_200/1, file_403/1, dir_403/1, file_404/1, file_400/1]). %% http and https. @@ -43,7 +44,7 @@ groups() -> set_resp_header, set_resp_overwrite, set_resp_body, response_as_req, stream_body_set_resp, static_mimetypes_function, static_attribute_etag, - static_function_etag] ++ BaseTests}, + static_function_etag, multipart] ++ BaseTests}, {https, [], BaseTests}, {misc, [], [http_10_hostless]}, {rest, [], [rest_simple, rest_keepalive]}]. @@ -144,6 +145,7 @@ init_http_dispatch(Config) -> {[<<"static_function_etag">>, '...'], cowboy_http_static, [{directory, ?config(static_dir, Config)}, {etag, {fun static_function_etag/2, etag_data}}]}, + {[<<"multipart">>], http_handler_multipart, []}, {[], http_handler, []} ]} ]. @@ -236,6 +238,24 @@ max_keepalive_loop(Socket, N) -> end, keepalive_nl_loop(Socket, N - 1). +multipart(Config) -> + Url = build_url("/multipart", Config), + Body = << + "This is a preamble." + "\r\n--OHai\r\nX-Name:answer\r\n\r\n42" + "\r\n--OHai\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" + "\r\n--OHai--" + "This is an epiloque." + >>, + Request = {Url, [], "multipart/x-makes-no-sense; boundary=OHai", Body}, + {ok, {{"HTTP/1.1", 200, "OK"}, _Headers, Response}} = + httpc:request(post, Request, [], [{body_format, binary}]), + Parts = binary_to_term(Response), + Parts = [ + {[{<<"X-Name">>, <<"answer">>}], <<"42">>}, + {[{'Server', <<"Cowboy">>}], <<"It rocks!\r\n">>} + ]. + nc_rand(Config) -> nc_reqs(Config, "/dev/urandom"). diff --git a/test/http_handler_multipart.erl b/test/http_handler_multipart.erl new file mode 100644 index 0000000..f5f7919 --- /dev/null +++ b/test/http_handler_multipart.erl @@ -0,0 +1,29 @@ +%% Feel free to use, reuse and abuse the code in this file. + +-module(http_handler_multipart). +-behaviour(cowboy_http_handler). +-export([init/3, handle/2, terminate/2]). + +init({_Transport, http}, Req, []) -> + {ok, Req, {}}. + +handle(Req, State) -> + {Result, Req2} = acc_multipart(Req, []), + {ok, Req3} = cowboy_http_req:reply(200, [], term_to_binary(Result), Req2), + {ok, Req3, State}. + +terminate(_Req, _State) -> + ok. + +acc_multipart(Req, Acc) -> + {Result, Req2} = cowboy_http_req:multipart_data(Req), + acc_multipart(Req2, Acc, Result). + +acc_multipart(Req, Acc, {headers, Headers}) -> + acc_multipart(Req, [{Headers, []}|Acc]); +acc_multipart(Req, [{Headers, BodyAcc}|Acc], {body, Data}) -> + acc_multipart(Req, [{Headers, [Data|BodyAcc]}|Acc]); +acc_multipart(Req, [{Headers, BodyAcc}|Acc], end_of_part) -> + acc_multipart(Req, [{Headers, list_to_binary(lists:reverse(BodyAcc))}|Acc]); +acc_multipart(Req, Acc, eof) -> + {lists:reverse(Acc), Req}. |