From 528507c7decb2bf2fcbb55a47256011c2ce4bd4b Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 8 Nov 2011 00:51:49 +0100 Subject: Add multipart support --- src/cowboy_http.erl | 4 +- src/cowboy_http_protocol.erl | 5 +- src/cowboy_http_req.erl | 67 ++++++++++++ src/cowboy_multipart.erl | 249 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 322 insertions(+), 3 deletions(-) create mode 100644 src/cowboy_multipart.erl (limited to 'src') diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index 6454521..6404379 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -17,10 +17,10 @@ -module(cowboy_http). %% Parsing. --export([list/2, nonempty_list/2, content_type/1, +-export([list/2, nonempty_list/2, content_type/1, content_type_params/3, media_range/2, conneg/2, language_range/2, entity_tag_match/1, http_date/1, rfc1123_date/1, rfc850_date/1, asctime_date/1, - digits/1, token/2, token_ci/2, quoted_string/2]). + whitespace/2, digits/1, token/2, token_ci/2, quoted_string/2]). %% Interpretation. -export([connection_to_atom/1]). diff --git a/src/cowboy_http_protocol.erl b/src/cowboy_http_protocol.erl index b0ee590..c76c607 100644 --- a/src/cowboy_http_protocol.erl +++ b/src/cowboy_http_protocol.erl @@ -350,7 +350,10 @@ ensure_body_processed(Req=#http_req{body_state=waiting}) -> {error, badarg} -> ok; %% No body. {error, _Reason} -> close; _Any -> ok - end. + end; +ensure_body_processed(Req=#http_req{body_state={multipart, _, _}}) -> + {ok, Req2} = cowboy_http_req:multipart_skip(Req), + ensure_body_processed(Req2). -spec ensure_response(#http_req{}) -> ok. %% The handler has already fully replied to the client. diff --git a/src/cowboy_http_req.erl b/src/cowboy_http_req.erl index fa4e620..f850e52 100644 --- a/src/cowboy_http_req.erl +++ b/src/cowboy_http_req.erl @@ -36,6 +36,10 @@ body/1, body/2, body_qs/1 ]). %% Request Body API. +-export([ + multipart_data/1, multipart_skip/1 +]). %% Request Multipart API. + -export([ reply/2, reply/3, reply/4, chunked_reply/2, chunked_reply/3, chunk/2, @@ -358,6 +362,69 @@ body_qs(Req) -> {ok, Body, Req2} = body(Req), {parse_qs(Body), Req2}. +%% Multipart Request API. + +%% @doc Return data from the multipart parser. +%% +%% Use this function for multipart streaming. For each part in the request, +%% this function returns {headers, Headers} followed by a sequence of +%% {data, Data} tuples and finally end_of_part. When there +%% is no part to parse anymore, eof is returned. +%% +%% If the request Content-Type is not a multipart one, {error, badarg} +%% is returned. +-spec multipart_data(#http_req{}) -> {multipart_data(), #http_req{}}. +multipart_data(Req=#http_req{body_state=waiting}) -> + {{<<"multipart">>, _SubType, Params}, Req2} = + parse_header('Content-Type', Req), + {_, Boundary} = lists:keyfind(<<"boundary">>, 1, Params), + {Length, Req3=#http_req{buffer=Buffer}} = + parse_header('Content-Length', Req2), + multipart_data(Req3, Length, cowboy_multipart:parser(Boundary), Buffer); +multipart_data(Req=#http_req{body_state={multipart, Length, Cont}}) -> + multipart_data(Req, Length, Cont()); +multipart_data(Req=#http_req{body_state=done}) -> + {eof, Req}. + +multipart_data(Req, Length, Parser, Buffer) when byte_size(Buffer) >= Length -> + << Data:Length/binary, Rest/binary >> = Buffer, + multipart_data(Req#http_req{buffer=Rest}, 0, Parser(Data)); +multipart_data(Req, Length, Parser, Buffer) -> + NewLength = Length - byte_size(Buffer), + multipart_data(Req#http_req{buffer= <<>>}, NewLength, Parser(Buffer)). + +multipart_data(Req, Length, {headers, Headers, Cont}) -> + {{headers, Headers}, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, Length, {body, Data, Cont}) -> + {{body, Data}, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, Length, {end_of_part, Cont}) -> + {end_of_part, Req#http_req{body_state={multipart, Length, Cont}}}; +multipart_data(Req, 0, eof) -> + {eof, Req#http_req{body_state=done}}; +multipart_data(Req=#http_req{socket=Socket, transport=Transport}, + Length, eof) -> + {ok, _Data} = Transport:recv(Socket, Length, 5000), + {eof, Req#http_req{body_state=done}}; +multipart_data(Req=#http_req{socket=Socket, transport=Transport}, + Length, {more, Parser}) when Length > 0 -> + case Transport:recv(Socket, 0, 5000) of + {ok, << Data:Length/binary, Buffer/binary >>} -> + multipart_data(Req#http_req{buffer=Buffer}, 0, Parser(Data)); + {ok, Data} -> + multipart_data(Req, Length - byte_size(Data), Parser(Data)) + end. + +%% @doc Skip a part returned by the multipart parser. +%% +%% This function repeatedly calls multipart_data/1 until +%% end_of_part or eof is parsed. +multipart_skip(Req) -> + case multipart_data(Req) of + {end_of_part, Req2} -> {ok, Req2}; + {eof, Req2} -> {ok, Req2}; + {_Other, Req2} -> multipart_skip(Req2) + end. + %% Response API. %% @equiv reply(Status, [], [], Req) diff --git a/src/cowboy_multipart.erl b/src/cowboy_multipart.erl new file mode 100644 index 0000000..b7aeb54 --- /dev/null +++ b/src/cowboy_multipart.erl @@ -0,0 +1,249 @@ +%% Copyright (c) 2011, Anthony Ramine +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% @doc Multipart parser. +-module(cowboy_multipart). + +-type part_parser() :: parser(more(part_result())). +-type parser(T) :: fun((binary()) -> T). +-type more(T) :: T | {more, parser(T)}. +-type part_result() :: headers() | eof. +-type headers() :: {headers, http_headers(), body_cont()}. +-type http_headers() :: [{atom() | binary(), binary()}]. +-type body_cont() :: cont(more(body_result())). +-type cont(T) :: fun(() -> T). +-type body_result() :: {body, binary(), body_cont()} | end_of_part(). +-type end_of_part() :: {end_of_part, cont(more(part_result()))}. +-type disposition() :: {binary(), [{binary(), binary()}]}. + +-export([parser/1, content_disposition/1]). + +-include_lib("eunit/include/eunit.hrl"). + +%% API. + +%% @doc Return a multipart parser for the given boundary. +-spec parser(binary()) -> part_parser(). +parser(Boundary) when is_binary(Boundary) -> + fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end. + +%% @doc Parse a content disposition. +%% @todo Parse the MIME header instead of the HTTP one. +-spec content_disposition(binary()) -> disposition(). +content_disposition(Data) -> + cowboy_http:token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Disposition) -> + cowboy_http:content_type_params(Rest, + fun (Params) -> {Disposition, Params} end, []) + end). + +%% Internal. + +%% @doc Entry point of the multipart parser, skips over the preamble if any. +-spec parse(binary(), binary()) -> more(part_result()). +parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 -> + BoundarySize = byte_size(Boundary), + Pattern = pattern(Boundary), + case Bin of + <<"--", Boundary:BoundarySize/binary, Rest/binary>> -> + % Data starts with initial boundary, skip preamble parsing. + parse_boundary_tail(Rest, Pattern); + _ -> + % Parse preamble. + skip(Bin, Pattern) + end; +parse(Bin, Boundary) -> + % Not enough data to know if the data begins with a boundary. + more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end). + +-type pattern() :: {binary:cp(), non_neg_integer()}. + +%% @doc Return a compiled binary pattern with its size in bytes. +%% The pattern is the boundary prepended with "\r\n--". +-spec pattern(binary()) -> pattern(). +pattern(Boundary) -> + MatchPattern = <<"\r\n--", Boundary/binary>>, + {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}. + +%% @doc Parse remaining characters of a line beginning with the boundary. +%% If followed by "--", eof is returned and parsing is finished. +-spec parse_boundary_tail(binary(), pattern()) -> more(part_result()). +parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 -> + case Bin of + <<"--", _Rest/binary>> -> + % Boundary is followed by "--", end parsing. + eof; + _ -> + % No dash after boundary, proceed with unknown chars and lwsp + % removal. + parse_boundary_eol(Bin, Pattern) + end; +parse_boundary_tail(Bin, Pattern) -> + % Boundary may be followed by "--", need more data. + more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end). + +%% @doc Skip whitespace and unknown chars until CRLF. +-spec parse_boundary_eol(binary(), pattern()) -> more(part_result()). +parse_boundary_eol(Bin, Pattern) -> + case binary:match(Bin, <<"\r\n">>) of + {CrlfStart, _Length} -> + % End of line found, remove optional whitespace. + <<_:CrlfStart/binary, Rest/binary>> = Bin, + Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end, + cowboy_http:whitespace(Rest, Fun); + nomatch -> + % CRLF not found in the given binary. + RestStart = max(byte_size(Bin) - 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end) + end. + +-spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()). +parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) -> + % The binary is at least 2 bytes long as this function is only called by + % parse_boundary_eol/3 when CRLF has been found so a more tuple will never + % be returned from here. + parse_headers(Rest, Pattern); +parse_boundary_crlf(Bin, Pattern) -> + % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is + % not followed directly by a new line. In this implementation it is + % considered part of the boundary so EOL needs to be searched again. + parse_boundary_eol(Bin, Pattern). + +-spec parse_headers(binary(), pattern()) -> more(part_result()). +parse_headers(Bin, Pattern) -> + parse_headers(Bin, Pattern, []). + +-spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()). +parse_headers(Bin, Pattern, Acc) -> + case erlang:decode_packet(httph_bin, Bin, []) of + {ok, {http_header, _, Name, _, Value}, Rest} -> + parse_headers(Rest, Pattern, [{Name, Value} | Acc]); + {ok, http_eoh, Rest} -> + Headers = lists:reverse(Acc), + {headers, Headers, fun () -> parse_body(Rest, Pattern) end}; + {ok, {http_error, _}, _} -> + % Skip malformed parts. + skip(Bin, Pattern); + {more, _} -> + more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end) + end. + +-spec parse_body(binary(), pattern()) -> more(body_result()). +parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize -> + case binary:match(Bin, P) of + {0, _Length} -> + <<_:PSize/binary, Rest/binary>> = Bin, + end_of_part(Rest, Pattern); + {BoundaryStart, _Length} -> + % Boundary found, this is the latest partial body that will be + % returned for this part. + <> = Bin, + FResult = end_of_part(Rest, Pattern), + {body, PBody, fun () -> FResult end}; + nomatch -> + PartialLength = byte_size(Bin) - PSize + 1, + <> = Bin, + {body, PBody, fun () -> parse_body(Rest, Pattern) end} + end; +parse_body(Bin, Pattern) -> + more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end). + +-spec end_of_part(binary(), pattern()) -> end_of_part(). +end_of_part(Bin, Pattern) -> + {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}. + +-spec skip(binary(), pattern()) -> more(part_result()). +skip(Bin, Pattern = {P, PSize}) -> + case binary:match(Bin, P) of + {BoundaryStart, _Length} -> + % Boundary found, proceed with parsing of the next part. + RestStart = BoundaryStart + PSize, + <<_:RestStart/binary, Rest/binary>> = Bin, + parse_boundary_tail(Rest, Pattern); + nomatch -> + % Boundary not found, need more data. + RestStart = max(byte_size(Bin) - PSize + 1, 0), + <<_:RestStart/binary, Rest/binary>> = Bin, + more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end) + end. + +-spec more(binary(), parser(T)) -> {more, parser(T)}. +more(<<>>, F) -> + {more, F}; +more(Bin, InnerF) -> + F = fun (NewData) when is_binary(NewData) -> + InnerF(<>) + end, + {more, F}. + +%% Tests. + +-ifdef(TEST). + +multipart_test_() -> + %% {Body, Result} + Tests = [ + {<<"--boundary--">>, []}, + {<<"preamble\r\n--boundary--">>, []}, + {<<"--boundary--\r\nepilogue">>, []}, + {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>, + [{[{<<"A">>, <<"b">>}, {<<"C">>, <<"d">>}], <<>>}]}, + { + << + "--boundary\r\nX-Name:answer\r\n\r\n42" + "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n" + "\r\n--boundary--" + >>, + [ + {[{<<"X-Name">>, <<"answer">>}], <<"42">>}, + {[{'Server', <<"Cowboy">>}], <<"It rocks!\r\n">>} + ] + } + ], + [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests]. + +acc_multipart(V) -> + acc_multipart((parser(<<"boundary">>))(V), []). + +acc_multipart({headers, Headers, Cont}, Acc) -> + acc_multipart(Cont(), [{Headers, []}|Acc]); +acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) -> + acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]); +acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) -> + Body = list_to_binary(lists:reverse(BodyAcc)), + acc_multipart(Cont(), [{Headers, Body}|Acc]); +acc_multipart(eof, Acc) -> + lists:reverse(Acc). + +content_disposition_test_() -> + %% {Disposition, Result} + Tests = [ + {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}}, + {<<"inline">>, {<<"inline">>, []}}, + {<<"attachment; \tfilename=brackets-slides.pdf">>, + {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}} + ], + [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests]. + +title(Bin) -> + Title = lists:foldl( + fun ({T, R}, V) -> re:replace(V, T, R, [global]) end, + Bin, + [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}] + ), + iolist_to_binary(Title). + +-endif. -- cgit v1.2.3