From c747efbd7533c3b4dd7caa267070c36608c4c0d2 Mon Sep 17 00:00:00 2001 From: Magnus Klaar Date: Mon, 5 Dec 2011 01:08:38 +0100 Subject: replace quoted:from_url with cowboy_http:urldecode This change makes the dependency on quoted optional by adding a minimal urldecode function to cowboy. A protocol option for setting the urldecoding function has been added to the cowboy_http_protocol module. The default value for this option is set to be equivalent to the default settings for quoted. {fun cowboy_http:urldecode/2, crash} A note has been added in the README to document how to use quoted instead of this function. A field to store this option value has been added to the state record in the cowboy_http_protocol module and the http_req record in include/http.hrl Functions that previously used quoted:from_url/1 has been updated to require an equivalent function in addition to the previously required arguments. This change removes a C compiler from the build requirements of cowboy. It also removes the requirement to cross compile the code if the target arch/OS is different from the arch/OS used to build it. --- README.md | 8 +++++++ include/http.hrl | 5 ++++- rebar.config | 2 -- src/cowboy_dispatcher.erl | 20 +++++++++-------- src/cowboy_http.erl | 51 +++++++++++++++++++++++++++++++++++++++++++- src/cowboy_http_protocol.erl | 20 +++++++++++------ src/cowboy_http_req.erl | 29 ++++++++++++++----------- 7 files changed, 103 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 2e5edcb..c297790 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,14 @@ handle(Req, State) -> terminate(Req, State) -> ok. ``` +**Note**: versions prior to `0.4.0` used the +[quoted](https://github.com/klaar/quoted.erl) library instead of the built in +`cowboy_http:urldecode/2` function. If you want to retain this you must add it +as a dependency to your application and add the following cowboy_http_protocol +option: + + {urldecode, {fun quoted:from_url/2, quoted:make([])}} + Continue reading to learn how to dispatch rules and handle requests. diff --git a/include/http.hrl b/include/http.hrl index d1ca0aa..fd0eecc 100644 --- a/include/http.hrl +++ b/include/http.hrl @@ -68,5 +68,8 @@ %% Response. resp_state = waiting :: locked | waiting | chunks | done, resp_headers = [] :: http_headers(), - resp_body = <<>> :: binary() + resp_body = <<>> :: binary(), + + %% Functions. + urldecode :: {fun((binary(), T) -> binary()), T} }). diff --git a/rebar.config b/rebar.config index fe95b2c..82d1fca 100644 --- a/rebar.config +++ b/rebar.config @@ -1,7 +1,5 @@ {cover_enabled, true}. {deps, [ - {quoted, "1.2.*", - {git, "git://github.com/klaar/quoted.erl.git", {tag, "1.2.0"}}}, {proper, "1.0", {git, "git://github.com/manopapad/proper.git", {tag, "v1.0"}}} ]}. diff --git a/src/cowboy_dispatcher.erl b/src/cowboy_dispatcher.erl index 67ea34b..22f6e1e 100644 --- a/src/cowboy_dispatcher.erl +++ b/src/cowboy_dispatcher.erl @@ -16,7 +16,7 @@ %% @doc Dispatch requests according to a hostname and path. -module(cowboy_dispatcher). --export([split_host/1, split_path/1, match/3]). %% API. +-export([split_host/1, split_path/2, match/3]). %% API. -type bindings() :: list({atom(), binary()}). -type tokens() :: list(binary()). @@ -50,21 +50,22 @@ split_host(Host) -> %% Following RFC2396, this function may return path segments containing any %% character, including / if, and only if, a / was escaped %% and part of a path segment. --spec split_path(binary()) -> {tokens(), binary(), binary()}. -split_path(Path) -> +-spec split_path(binary(), fun((binary()) -> binary())) -> + {tokens(), binary(), binary()}. +split_path(Path, URLDec) -> case binary:split(Path, <<"?">>) of - [Path] -> {do_split_path(Path, <<"/">>), Path, <<>>}; + [Path] -> {do_split_path(Path, <<"/">>, URLDec), Path, <<>>}; [<<>>, Qs] -> {[], <<>>, Qs}; - [Path2, Qs] -> {do_split_path(Path2, <<"/">>), Path2, Qs} + [Path2, Qs] -> {do_split_path(Path2, <<"/">>, URLDec), Path2, Qs} end. --spec do_split_path(binary(), <<_:8>>) -> tokens(). -do_split_path(RawPath, Separator) -> +-spec do_split_path(binary(), <<_:8>>, fun((binary()) -> binary())) -> tokens(). +do_split_path(RawPath, Separator, URLDec) -> EncodedPath = case binary:split(RawPath, Separator, [global, trim]) of [<<>>|Path] -> Path; Path -> Path end, - [quoted:from_url(Token) || Token <- EncodedPath]. + [URLDec(Token) || Token <- EncodedPath]. %% @doc Match hostname tokens and path tokens against dispatch rules. %% @@ -224,7 +225,8 @@ split_path_test_() -> [<<"users">>, <<"a b">>, <<"c!d">>], <<"/users/a+b/c%21d">>, <<"e+f=g+h">>} ], - [{P, fun() -> {R, RawP, Qs} = split_path(P) end} + URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end, + [{P, fun() -> {R, RawP, Qs} = split_path(P, URLDecode) end} || {P, R, RawP, Qs} <- Tests]. match_test_() -> diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index 4bb7c2c..fd0d142 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -23,7 +23,7 @@ digits/1, token/2, token_ci/2, quoted_string/2]). %% Interpretation. --export([connection_to_atom/1]). +-export([connection_to_atom/1, urldecode/1, urldecode/2]). -include("include/http.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -670,6 +670,46 @@ connection_to_atom([<<"close">>|_Tail]) -> connection_to_atom([_Any|Tail]) -> connection_to_atom(Tail). +%% @doc Decode a URL encoded binary. +%% @equiv urldecode(Bin, crash) +-spec urldecode(binary()) -> binary(). +urldecode(Bin) when is_binary(Bin) -> + urldecode(Bin, <<>>, crash). + +%% @doc Decode a URL encoded binary. +%% The second argument specifies how to handle percent characters that are not +%% followed by two valid hex characters. Use `skip' to ignore such errors, +%% if `crash' is used the function will fail with the reason `badarg'. +-spec urldecode(binary(), crash | skip) -> binary(). +urldecode(Bin, OnError) when is_binary(Bin) -> + urldecode(Bin, <<>>, OnError). + +-spec urldecode(binary(), binary(), crash | skip) -> binary(). +urldecode(<<$%, H, L, Rest/binary>>, Acc, OnError) -> + G = unhex(H), + M = unhex(L), + if G =:= error; M =:= error -> + case OnError of skip -> ok; crash -> erlang:error(badarg) end, + urldecode(<>, <>, OnError); + true -> + urldecode(Rest, <>, OnError) + end; +urldecode(<<$%, Rest/binary>>, Acc, OnError) -> + case OnError of skip -> ok; crash -> erlang:error(badarg) end, + urldecode(Rest, <>, OnError); +urldecode(<<$+, Rest/binary>>, Acc, OnError) -> + urldecode(Rest, <>, OnError); +urldecode(<>, Acc, OnError) -> + urldecode(Rest, <>, OnError); +urldecode(<<>>, Acc, _OnError) -> + Acc. + +-spec unhex(byte()) -> byte() | error. +unhex(C) when C >= $0, C =< $9 -> C - $0; +unhex(C) when C >= $A, C =< $F -> C - $A + 10; +unhex(C) when C >= $a, C =< $f -> C - $a + 10; +unhex(_) -> error. + %% Tests. -ifdef(TEST). @@ -836,4 +876,13 @@ digits_test_() -> ], [{V, fun() -> R = digits(V) end} || {V, R} <- Tests]. +urldecode_test_() -> + Tests = [ + {<<" ">>, <<"%20">>}, + {<<" ">>, <<"+">>}, + {<<0>>, <<"%00">>}, + {<<255>>, <<"%fF">>} + ], + [{I, ?_assertEqual(E, urldecode(I))} || {E, I} <- Tests]. + -endif. diff --git a/src/cowboy_http_protocol.erl b/src/cowboy_http_protocol.erl index 83c5513..54df29f 100644 --- a/src/cowboy_http_protocol.erl +++ b/src/cowboy_http_protocol.erl @@ -22,6 +22,9 @@ %% Defaults to 5. %%
timeout
Time in milliseconds before an idle %% connection is closed. Defaults to 5000 milliseconds.
+%%
urldecode
Function and options argument to use when decoding +%% URL encoded strings. Defaults to `{fun cowboy_http:urldecode/2, crash}'. +%%
%% %% %% Note that there is no need to monitor these processes when using Cowboy as @@ -44,6 +47,7 @@ transport :: module(), dispatch :: cowboy_dispatcher:dispatch_rules(), handler :: {module(), any()}, + urldecode :: {fun((binary(), T) -> binary()), T}, req_empty_lines = 0 :: integer(), max_empty_lines :: integer(), max_line_length :: integer(), @@ -71,10 +75,12 @@ init(ListenerPid, Socket, Transport, Opts) -> MaxEmptyLines = proplists:get_value(max_empty_lines, Opts, 5), MaxLineLength = proplists:get_value(max_line_length, Opts, 4096), Timeout = proplists:get_value(timeout, Opts, 5000), + URLDecDefault = {fun cowboy_http:urldecode/2, crash}, + URLDec = proplists:get_value(urldecode, Opts, URLDecDefault), receive shoot -> ok end, wait_request(#state{listener=ListenerPid, socket=Socket, transport=Transport, dispatch=Dispatch, max_empty_lines=MaxEmptyLines, - max_line_length=MaxLineLength, timeout=Timeout}). + max_line_length=MaxLineLength, timeout=Timeout, urldecode=URLDec}). %% @private -spec parse_request(#state{}) -> ok | none(). @@ -106,18 +112,20 @@ request({http_request, _Method, _URI, Version}, State) error_terminate(505, State); %% @todo We need to cleanup the URI properly. request({http_request, Method, {abs_path, AbsPath}, Version}, - State=#state{socket=Socket, transport=Transport}) -> - {Path, RawPath, Qs} = cowboy_dispatcher:split_path(AbsPath), + State=#state{socket=Socket, transport=Transport, + urldecode={URLDecFun, URLDecArg}=URLDec}) -> + URLDecode = fun(Bin) -> URLDecFun(Bin, URLDecArg) end, + {Path, RawPath, Qs} = cowboy_dispatcher:split_path(AbsPath, URLDecode), ConnAtom = version_to_connection(Version), parse_header(#http_req{socket=Socket, transport=Transport, connection=ConnAtom, method=Method, version=Version, - path=Path, raw_path=RawPath, raw_qs=Qs}, State); + path=Path, raw_path=RawPath, raw_qs=Qs, urldecode=URLDec}, State); request({http_request, Method, '*', Version}, - State=#state{socket=Socket, transport=Transport}) -> + State=#state{socket=Socket, transport=Transport, urldecode=URLDec}) -> ConnAtom = version_to_connection(Version), parse_header(#http_req{socket=Socket, transport=Transport, connection=ConnAtom, method=Method, version=Version, - path='*', raw_path= <<"*">>, raw_qs= <<>>}, State); + path='*', raw_path= <<"*">>, raw_qs= <<>>, urldecode=URLDec}, State); request({http_request, _Method, _URI, _Version}, State) -> error_terminate(501, State); request({http_error, <<"\r\n">>}, diff --git a/src/cowboy_http_req.erl b/src/cowboy_http_req.erl index d5ee3fa..f3a3367 100644 --- a/src/cowboy_http_req.erl +++ b/src/cowboy_http_req.erl @@ -124,9 +124,9 @@ qs_val(Name, Req) when is_binary(Name) -> %% missing. -spec qs_val(binary(), #http_req{}, Default) -> {binary() | true | Default, #http_req{}} when Default::any(). -qs_val(Name, Req=#http_req{raw_qs=RawQs, qs_vals=undefined}, Default) - when is_binary(Name) -> - QsVals = parse_qs(RawQs), +qs_val(Name, Req=#http_req{raw_qs=RawQs, qs_vals=undefined, + urldecode={URLDecFun, URLDecArg}}, Default) when is_binary(Name) -> + QsVals = parse_qs(RawQs, fun(Bin) -> URLDecFun(Bin, URLDecArg) end), qs_val(Name, Req#http_req{qs_vals=QsVals}, Default); qs_val(Name, Req, Default) -> case lists:keyfind(Name, 1, Req#http_req.qs_vals) of @@ -136,8 +136,9 @@ qs_val(Name, Req, Default) -> %% @doc Return the full list of query string values. -spec qs_vals(#http_req{}) -> {list({binary(), binary() | true}), #http_req{}}. -qs_vals(Req=#http_req{raw_qs=RawQs, qs_vals=undefined}) -> - QsVals = parse_qs(RawQs), +qs_vals(Req=#http_req{raw_qs=RawQs, qs_vals=undefined, + urldecode={URLDecFun, URLDecArg}}) -> + QsVals = parse_qs(RawQs, fun(Bin) -> URLDecFun(Bin, URLDecArg) end), qs_vals(Req#http_req{qs_vals=QsVals}); qs_vals(Req=#http_req{qs_vals=QsVals}) -> {QsVals, Req}. @@ -355,9 +356,9 @@ body(Length, Req=#http_req{socket=Socket, transport=Transport, %% @doc Return the full body sent with the reqest, parsed as an %% application/x-www-form-urlencoded string. Essentially a POST query string. -spec body_qs(#http_req{}) -> {list({binary(), binary() | true}), #http_req{}}. -body_qs(Req) -> +body_qs(Req=#http_req{urldecode={URLDecFun, URLDecArg}}) -> {ok, Body, Req2} = body(Req), - {parse_qs(Body), Req2}. + {parse_qs(Body, fun(Bin) -> URLDecFun(Bin, URLDecArg) end), Req2}. %% Response API. @@ -476,14 +477,15 @@ compact(Req) -> %% Internal. --spec parse_qs(binary()) -> list({binary(), binary() | true}). -parse_qs(<<>>) -> +-spec parse_qs(binary(), fun((binary()) -> binary())) -> + list({binary(), binary() | true}). +parse_qs(<<>>, _URLDecode) -> []; -parse_qs(Qs) -> +parse_qs(Qs, URLDecode) -> Tokens = binary:split(Qs, <<"&">>, [global, trim]), [case binary:split(Token, <<"=">>) of - [Token] -> {quoted:from_url(Token), true}; - [Name, Value] -> {quoted:from_url(Name), quoted:from_url(Value)} + [Token] -> {URLDecode(Token), true}; + [Name, Value] -> {URLDecode(Name), URLDecode(Value)} end || Token <- Tokens]. -spec response_connection(http_headers(), keepalive | close) @@ -663,6 +665,7 @@ parse_qs_test_() -> {<<"a=b=c=d=e&f=g">>, [{<<"a">>, <<"b=c=d=e">>}, {<<"f">>, <<"g">>}]}, {<<"a+b=c+d">>, [{<<"a b">>, <<"c d">>}]} ], - [{Qs, fun() -> R = parse_qs(Qs) end} || {Qs, R} <- Tests]. + URLDecode = fun cowboy_http:urldecode/1, + [{Qs, fun() -> R = parse_qs(Qs, URLDecode) end} || {Qs, R} <- Tests]. -endif. -- cgit v1.2.3