From b2243aa54438dbea4137960c9dc6f54ac746f429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 26 Sep 2012 14:11:53 +0200 Subject: Optimize cowboy_protocol * #state{} changes are avoided where possible * #state{} is now smaller and use less memory * the Req object is created only after the whole request is parsed * parsing makes use of a single binary match context * external calls are avoided in the critical path * URL fragment is now extracted properly (retrieval API next commit) * argument orders to local functions modified to avoid extra operations * dispatching waits as long as possible before tokenizing host/path * handler opts are no longer shown in the error messages except in init The code may not look as beautiful as it was before. But it really is, for parsing code. The parsing section of the file may be skipped if your eyes start to burn. --- src/cowboy_dispatcher.erl | 275 ++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 158 deletions(-) (limited to 'src/cowboy_dispatcher.erl') diff --git a/src/cowboy_dispatcher.erl b/src/cowboy_dispatcher.erl index c52aba9..f5dfcd9 100644 --- a/src/cowboy_dispatcher.erl +++ b/src/cowboy_dispatcher.erl @@ -17,16 +17,14 @@ -module(cowboy_dispatcher). %% API. --export([split_host/1]). --export([split_path/2]). --export([match/3]). +-export([match/4]). --type bindings() :: list({atom(), binary()}). --type tokens() :: list(binary()). --type match_rule() :: '_' | '*' | list(binary() | '_' | '...' | atom()). --type dispatch_path() :: list({match_rule(), module(), any()}). +-type bindings() :: [{atom(), binary()}]. +-type tokens() :: [binary()]. +-type match_rule() :: '_' | '*' | [binary() | '_' | '...' | atom()]. +-type dispatch_path() :: [{match_rule(), module(), any()}]. -type dispatch_rule() :: {Host::match_rule(), Path::dispatch_path()}. --type dispatch_rules() :: list(dispatch_rule()). +-type dispatch_rules() :: [dispatch_rule()]. -export_type([bindings/0]). -export_type([tokens/0]). @@ -38,60 +36,6 @@ %% API. -%% @doc Split a hostname into a list of tokens. --spec split_host(binary()) - -> {tokens(), binary(), undefined | inet:port_number()}. -split_host(Host) -> - case binary:match(Host, <<":">>) of - nomatch -> - {do_split_host(Host, []), Host, undefined}; - {Pos, _} -> - << Host2:Pos/binary, _:8, Port/bits >> = Host, - {do_split_host(Host2, []), Host2, - list_to_integer(binary_to_list(Port))} - end. - -do_split_host(Host, Acc) -> - case binary:match(Host, <<".">>) of - nomatch when Host =:= <<>> -> - Acc; - nomatch -> - [Host|Acc]; - {Pos, _} -> - << Segment:Pos/binary, _:8, Rest/bits >> = Host, - false = byte_size(Segment) == 0, - do_split_host(Rest, [Segment|Acc]) - end. - -%% @doc Split a path into a list of path segments. -%% -%% Following RFC2396, this function may return path segments containing any -%% character, including / if, and only if, a / was escaped -%% and part of a path segment. --spec split_path(binary(), fun((binary()) -> binary())) -> - {tokens(), binary(), binary()}. -split_path(Path, URLDec) -> - case binary:match(Path, <<"?">>) of - nomatch -> - {do_split_path(Path, URLDec), Path, <<>>}; - {Pos, _} -> - << Path2:Pos/binary, _:8, Qs/bits >> = Path, - {do_split_path(Path2, URLDec), Path2, Qs} - end. - -do_split_path(<< "/", Path/bits >>, URLDec) -> - do_split_path(Path, URLDec, []). -do_split_path(Path, URLDec, Acc) -> - case binary:match(Path, <<"/">>) of - nomatch when Path =:= <<>> -> - lists:reverse([URLDec(S) || S <- Acc]); - nomatch -> - lists:reverse([URLDec(S) || S <- [Path|Acc]]); - {Pos, _} -> - << Segment:Pos/binary, _:8, Rest/bits >> = Path, - do_split_path(Rest, URLDec, [Segment|Acc]) - end. - %% @doc Match hostname tokens and path tokens against dispatch rules. %% %% It is typically used for matching tokens for the hostname and path of @@ -119,47 +63,93 @@ do_split_path(Path, URLDec, Acc) -> %% options found in the dispatch list, a key-value list of bindings and %% the tokens that were matched by the '...' atom for both the %% hostname and path. --spec match(Host::tokens(), Path::tokens(), dispatch_rules()) +-spec match(dispatch_rules(), fun((binary()) -> binary()), + Host::binary() | tokens(), Path::binary()) -> {ok, module(), any(), bindings(), HostInfo::undefined | tokens(), PathInfo::undefined | tokens()} | {error, notfound, host} | {error, notfound, path}. -match(_Host, _Path, []) -> +match([], _, _, _) -> {error, notfound, host}; -match(_Host, Path, [{'_', PathMatchs}|_Tail]) -> - match_path(Path, PathMatchs, [], undefined); -match(Host, Path, [{HostMatch, PathMatchs}|Tail]) -> - case list_match(Host, lists:reverse(HostMatch), []) of +match([{'_', PathMatchs}|_Tail], URLDecode, _, Path) -> + match_path(PathMatchs, URLDecode, undefined, Path, []); +match([{HostMatch, PathMatchs}|Tail], URLDecode, Tokens, Path) + when is_list(Tokens) -> + case list_match(Tokens, lists:reverse(HostMatch), []) of false -> - match(Host, Path, Tail); - {true, HostBinds, undefined} -> - match_path(Path, PathMatchs, HostBinds, undefined); - {true, HostBinds, HostInfo} -> - match_path(Path, PathMatchs, HostBinds, lists:reverse(HostInfo)) - end. + match(Tail, URLDecode, Tokens, Path); + {true, Bindings, undefined} -> + match_path(PathMatchs, URLDecode, undefined, Path, Bindings); + {true, Bindings, HostInfo} -> + match_path(PathMatchs, URLDecode, lists:reverse(HostInfo), + Path, Bindings) + end; +match(Dispatch, URLDecode, Host, Path) -> + match(Dispatch, URLDecode, split_host(Host), Path). --spec match_path(tokens(), dispatch_path(), bindings(), - HostInfo::undefined | tokens()) +-spec match_path(dispatch_path(), fun((binary()) -> binary()), + HostInfo::undefined | tokens(), binary() | tokens(), bindings()) -> {ok, module(), any(), bindings(), HostInfo::undefined | tokens(), PathInfo::undefined | tokens()} | {error, notfound, path}. -match_path(_Path, [], _HostBinds, _HostInfo) -> +match_path([], _, _, _, _) -> {error, notfound, path}; -match_path(_Path, [{'_', Handler, Opts}|_Tail], HostBinds, HostInfo) -> - {ok, Handler, Opts, HostBinds, HostInfo, undefined}; -match_path('*', [{'*', Handler, Opts}|_Tail], HostBinds, HostInfo) -> - {ok, Handler, Opts, HostBinds, HostInfo, undefined}; -match_path(Path, [{PathMatch, Handler, Opts}|Tail], HostBinds, HostInfo) -> - case list_match(Path, PathMatch, []) of +match_path([{'_', Handler, Opts}|_Tail], _, HostInfo, _, Bindings) -> + {ok, Handler, Opts, Bindings, HostInfo, undefined}; +match_path([{'*', Handler, Opts}|_Tail], _, HostInfo, '*', Bindings) -> + {ok, Handler, Opts, Bindings, HostInfo, undefined}; +match_path([{PathMatch, Handler, Opts}|Tail], URLDecode, HostInfo, Tokens, + Bindings) when is_list(Tokens) -> + case list_match(Tokens, PathMatch, []) of false -> - match_path(Path, Tail, HostBinds, HostInfo); + match_path(Tail, URLDecode, HostInfo, Tokens, Bindings); {true, PathBinds, PathInfo} -> - {ok, Handler, Opts, HostBinds ++ PathBinds, HostInfo, PathInfo} - end. + {ok, Handler, Opts, Bindings ++ PathBinds, HostInfo, PathInfo} + end; +match_path(Dispatch, URLDecode, HostInfo, Path, Bindings) -> + match_path(Dispatch, URLDecode, HostInfo, + split_path(Path, URLDecode), Bindings). %% Internal. +%% @doc Split a hostname into a list of tokens. +-spec split_host(binary()) -> tokens(). +split_host(Host) -> + split_host(Host, []). + +split_host(Host, Acc) -> + case binary:match(Host, <<".">>) of + nomatch when Host =:= <<>> -> + Acc; + nomatch -> + [Host|Acc]; + {Pos, _} -> + << Segment:Pos/binary, _:8, Rest/bits >> = Host, + false = byte_size(Segment) == 0, + split_host(Rest, [Segment|Acc]) + end. + +%% @doc Split a path into a list of path segments. +%% +%% Following RFC2396, this function may return path segments containing any +%% character, including / if, and only if, a / was escaped +%% and part of a path segment. +-spec split_path(binary(), fun((binary()) -> binary())) -> tokens(). +split_path(<< $/, Path/bits >>, URLDec) -> + split_path(Path, URLDec, []). + +split_path(Path, URLDec, Acc) -> + case binary:match(Path, <<"/">>) of + nomatch when Path =:= <<>> -> + lists:reverse([URLDec(S) || S <- Acc]); + nomatch -> + lists:reverse([URLDec(S) || S <- [Path|Acc]]); + {Pos, _} -> + << Segment:Pos/binary, _:8, Rest/bits >> = Path, + split_path(Rest, URLDec, [Segment|Acc]) + end. + -spec list_match(tokens(), match_rule(), bindings()) -> {true, bindings(), undefined | tokens()} | false. %% Atom '...' matches any trailing path, stop right now. @@ -188,64 +178,32 @@ list_match(_List, _Match, _Binds) -> split_host_test_() -> %% {Host, Result} Tests = [ - {<<"">>, {[], <<"">>, undefined}}, - {<<"*">>, {[<<"*">>], <<"*">>, undefined}}, + {<<"">>, []}, + {<<"*">>, [<<"*">>]}, {<<"cowboy.ninenines.eu">>, - {[<<"eu">>, <<"ninenines">>, <<"cowboy">>], - <<"cowboy.ninenines.eu">>, undefined}}, + [<<"eu">>, <<"ninenines">>, <<"cowboy">>]}, {<<"ninenines.eu">>, - {[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, undefined}}, - {<<"ninenines.eu:8080">>, - {[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, 8080}}, + [<<"eu">>, <<"ninenines">>]}, {<<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>, - {[<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>, - <<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>, - <<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>, - <<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>], - <<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>, - undefined}} + [<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>, + <<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>, + <<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>, + <<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>]} ], [{H, fun() -> R = split_host(H) end} || {H, R} <- Tests]. -split_host_fail_test_() -> - Tests = [ - <<".........">>, - <<"ninenines..eu">>, - <<"ninenines.eu:owns">>, - <<"ninenines.eu: owns">>, - <<"ninenines.eu:42fun">>, - <<"ninenines.eu: 42fun">>, - <<"ninenines.eu:42 fun">>, - <<"ninenines.eu:fun 42">>, - <<"ninenines.eu: 42">>, - <<":owns">>, - <<":42 fun">> - ], - [{H, fun() -> case catch split_host(H) of - {'EXIT', _Reason} -> ok - end end} || H <- Tests]. - split_path_test_() -> %% {Path, Result, QueryString} Tests = [ - {<<"/?">>, [], <<"/">>, <<"">>}, - {<<"/???">>, [], <<"/">>, <<"??">>}, - {<<"/">>, [], <<"/">>, <<"">>}, - {<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>], - <<"/extend//cowboy">>, <<>>}, - {<<"/users">>, [<<"users">>], <<"/users">>, <<"">>}, - {<<"/users?">>, [<<"users">>], <<"/users">>, <<"">>}, - {<<"/users?a">>, [<<"users">>], <<"/users">>, <<"a">>}, - {<<"/users/42/friends?a=b&c=d&e=notsure?whatever">>, - [<<"users">>, <<"42">>, <<"friends">>], - <<"/users/42/friends">>, <<"a=b&c=d&e=notsure?whatever">>}, - {<<"/users/a+b/c%21d?e+f=g+h">>, - [<<"users">>, <<"a b">>, <<"c!d">>], - <<"/users/a+b/c%21d">>, <<"e+f=g+h">>} + {<<"/">>, []}, + {<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>]}, + {<<"/users">>, [<<"users">>]}, + {<<"/users/42/friends">>, [<<"users">>, <<"42">>, <<"friends">>]}, + {<<"/users/a+b/c%21d">>, [<<"users">>, <<"a b">>, <<"c!d">>]} ], URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end, - [{P, fun() -> {R, RawP, Qs} = split_path(P, URLDecode) end} - || {P, R, RawP, Qs} <- Tests]. + [{P, fun() -> R = split_path(P, URLDecode) end} + || {P, R} <- Tests]. match_test_() -> Dispatch = [ @@ -270,28 +228,31 @@ match_test_() -> ], %% {Host, Path, Result} Tests = [ - {[<<"any">>], [], {ok, match_any, [], []}}, - {[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>], - [<<"users">>, <<"42">>, <<"mails">>], + {<<"any">>, <<"/">>, {ok, match_any, [], []}}, + {<<"www.any.ninenines.eu">>, <<"/users/42/mails">>, {ok, match_any_subdomain_users, [], []}}, - {[<<"eu">>, <<"ninenines">>, <<"www">>], - [<<"users">>, <<"42">>, <<"mails">>], {ok, match_any, [], []}}, - {[<<"eu">>, <<"ninenines">>, <<"www">>], [], {ok, match_any, [], []}}, - {[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>], - [<<"not_users">>, <<"42">>, <<"mails">>], {error, notfound, path}}, - {[<<"eu">>, <<"ninenines">>], [], {ok, match_extend, [], []}}, - {[<<"eu">>, <<"ninenines">>], [<<"users">>, <<"42">>, <<"friends">>], + {<<"www.ninenines.eu">>, <<"/users/42/mails">>, + {ok, match_any, [], []}}, + {<<"www.ninenines.eu">>, <<"/">>, + {ok, match_any, [], []}}, + {<<"www.any.ninenines.eu">>, <<"/not_users/42/mails">>, + {error, notfound, path}}, + {<<"ninenines.eu">>, <<"/">>, + {ok, match_extend, [], []}}, + {<<"ninenines.eu">>, <<"/users/42/friends">>, {ok, match_extend_users_friends, [], [{id, <<"42">>}]}}, - {[<<"fr">>, <<"erlang">>], '_', + {<<"erlang.fr">>, '_', {ok, match_erlang_ext, [], [{ext, <<"fr">>}]}}, - {[<<"any">>], [<<"users">>, <<"444">>, <<"friends">>], + {<<"any">>, <<"/users/444/friends">>, {ok, match_users_friends, [], [{id, <<"444">>}]}}, - {[<<"fr">>, <<"ninenines">>], [<<"threads">>, <<"987">>], + {<<"ninenines.fr">>, <<"/threads/987">>, {ok, match_duplicate_vars, [we, {expect, two}, var, here], - [{var, <<"fr">>}, {var, <<"987">>}]}} + [{var, <<"fr">>}, {var, <<"987">>}]}} ], + URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end, [{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() -> - {ok, Handler, Opts, Binds, undefined, undefined} = match(H, P, Dispatch) + {ok, Handler, Opts, Binds, undefined, undefined} + = match(Dispatch, URLDecode, H, P) end} || {H, P, {ok, Handler, Opts, Binds}} <- Tests]. match_info_test_() -> @@ -304,24 +265,22 @@ match_info_test_() -> ]} ], Tests = [ - {[<<"eu">>, <<"ninenines">>], [], + {<<"ninenines.eu">>, <<"/">>, {ok, match_any, [], [], [], undefined}}, - {[<<"eu">>, <<"ninenines">>, <<"bugs">>], [], + {<<"bugs.ninenines.eu">>, <<"/">>, {ok, match_any, [], [], [<<"bugs">>], undefined}}, - {[<<"eu">>, <<"ninenines">>, <<"bugs">>, <<"cowboy">>], [], + {<<"cowboy.bugs.ninenines.eu">>, <<"/">>, {ok, match_any, [], [], [<<"cowboy">>, <<"bugs">>], undefined}}, - {[<<"eu">>, <<"ninenines">>, <<"www">>], - [<<"pathinfo">>, <<"is">>, <<"next">>], + {<<"www.ninenines.eu">>, <<"/pathinfo/is/next">>, {ok, match_path, [], [], undefined, []}}, - {[<<"eu">>, <<"ninenines">>, <<"www">>], - [<<"pathinfo">>, <<"is">>, <<"next">>, <<"path_info">>], + {<<"www.ninenines.eu">>, <<"/pathinfo/is/next/path_info">>, {ok, match_path, [], [], undefined, [<<"path_info">>]}}, - {[<<"eu">>, <<"ninenines">>, <<"www">>], - [<<"pathinfo">>, <<"is">>, <<"next">>, <<"foo">>, <<"bar">>], + {<<"www.ninenines.eu">>, <<"/pathinfo/is/next/foo/bar">>, {ok, match_path, [], [], undefined, [<<"foo">>, <<"bar">>]}} ], + URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end, [{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() -> - R = match(H, P, Dispatch) + R = match(Dispatch, URLDecode, H, P) end} || {H, P, R} <- Tests]. -endif. -- cgit v1.2.3