Optimize cowboy_protocol

* #state{} changes are avoided where possible * #state{} is now smaller and use less memory * the Req object is created only after the whole request is parsed * parsing makes use of a single binary match context * external calls are avoided in the critical path * URL fragment is now extracted properly (retrieval API next commit) * argument orders to local functions modified to avoid extra operations * dispatching waits as long as possible before tokenizing host/path * handler opts are no longer shown in the error messages except in init The code may not look as beautiful as it was before. But it really is, for parsing code. The parsing section of the file may be skipped if your eyes start to burn.
author: Loïc Hoguin <[email protected]> 2012-09-26 14:11:53 +0200
committer: Loïc Hoguin <[email protected]> 2012-09-26 14:20:29 +0200
commit: b2243aa54438dbea4137960c9dc6f54ac746f429 (patch)
tree: b7f3768cc6fea9bcc9d4363012a6cdf149178977 /src/cowboy_dispatcher.erl
parent: bfab8d4b22d858e7cffa97d04210a62fae56681c (diff)
download: cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.tar.gz
cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.tar.bz2
cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.zip
1 files changed, 117 insertions, 158 deletions
diff --git a/src/cowboy_dispatcher.erl b/src/cowboy_dispatcher.erl
index c52aba9..f5dfcd9 100644
--- a/src/cowboy_dispatcher.erl
+++ b/src/cowboy_dispatcher.erl
@@ -17,16 +17,14 @@
 -module(cowboy_dispatcher).
 
 %% API.
--export([split_host/1]).
--export([split_path/2]).
--export([match/3]).
+-export([match/4]).
 
--type bindings() :: list({atom(), binary()}).
--type tokens() :: list(binary()).
--type match_rule() :: '_' | '*' | list(binary() | '_' | '...' | atom()).
--type dispatch_path() :: list({match_rule(), module(), any()}).
+-type bindings() :: [{atom(), binary()}].
+-type tokens() :: [binary()].
+-type match_rule() :: '_' | '*' | [binary() | '_' | '...' | atom()].
+-type dispatch_path() :: [{match_rule(), module(), any()}].
 -type dispatch_rule() :: {Host::match_rule(), Path::dispatch_path()}.
--type dispatch_rules() :: list(dispatch_rule()).
+-type dispatch_rules() :: [dispatch_rule()].
 
 -export_type([bindings/0]).
 -export_type([tokens/0]).
@@ -38,60 +36,6 @@
 
 %% API.
 
-%% @doc Split a hostname into a list of tokens.
--spec split_host(binary())
-	-> {tokens(), binary(), undefined | inet:port_number()}.
-split_host(Host) ->
-	case binary:match(Host, <<":">>) of
-		nomatch ->
-			{do_split_host(Host, []), Host, undefined};
-		{Pos, _} ->
-			<< Host2:Pos/binary, _:8, Port/bits >> = Host,
-			{do_split_host(Host2, []), Host2,
-				list_to_integer(binary_to_list(Port))}
-	end.
-
-do_split_host(Host, Acc) ->
-	case binary:match(Host, <<".">>) of
-		nomatch when Host =:= <<>> ->
-			Acc;
-		nomatch ->
-			[Host|Acc];
-		{Pos, _} ->
-			<< Segment:Pos/binary, _:8, Rest/bits >> = Host,
-			false = byte_size(Segment) == 0,
-			do_split_host(Rest, [Segment|Acc])
-	end.
-
-%% @doc Split a path into a list of path segments.
-%%
-%% Following RFC2396, this function may return path segments containing any
-%% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
-%% and part of a path segment.
--spec split_path(binary(), fun((binary()) -> binary())) ->
-		{tokens(), binary(), binary()}.
-split_path(Path, URLDec) ->
-	case binary:match(Path, <<"?">>) of
-		nomatch ->
-			{do_split_path(Path, URLDec), Path, <<>>};
-		{Pos, _} ->
-			<< Path2:Pos/binary, _:8, Qs/bits >> = Path,
-			{do_split_path(Path2, URLDec), Path2, Qs}
-	end.
-
-do_split_path(<< "/", Path/bits >>, URLDec) ->
-	do_split_path(Path, URLDec, []).
-do_split_path(Path, URLDec, Acc) ->
-	case binary:match(Path, <<"/">>) of
-		nomatch when Path =:= <<>> ->
-			lists:reverse([URLDec(S) || S <- Acc]);
-		nomatch ->
-			lists:reverse([URLDec(S) || S <- [Path|Acc]]);
-		{Pos, _} ->
-			<< Segment:Pos/binary, _:8, Rest/bits >> = Path,
-			do_split_path(Rest, URLDec, [Segment|Acc])
-	end.
-
 %% @doc Match hostname tokens and path tokens against dispatch rules.
 %%
 %% It is typically used for matching tokens for the hostname and path of
@@ -119,47 +63,93 @@ do_split_path(Path, URLDec, Acc) ->
 %% options found in the dispatch list, a key-value list of bindings and
 %% the tokens that were matched by the <em>'...'</em> atom for both the
 %% hostname and path.
--spec match(Host::tokens(), Path::tokens(), dispatch_rules())
+-spec match(dispatch_rules(), fun((binary()) -> binary()),
+		Host::binary() | tokens(), Path::binary())
 	-> {ok, module(), any(), bindings(),
 		HostInfo::undefined | tokens(),
 		PathInfo::undefined | tokens()}
 	| {error, notfound, host} | {error, notfound, path}.
-match(_Host, _Path, []) ->
+match([], _, _, _) ->
 	{error, notfound, host};
-match(_Host, Path, [{'_', PathMatchs}|_Tail]) ->
-	match_path(Path, PathMatchs, [], undefined);
-match(Host, Path, [{HostMatch, PathMatchs}|Tail]) ->
-	case list_match(Host, lists:reverse(HostMatch), []) of
+match([{'_', PathMatchs}|_Tail], URLDecode, _, Path) ->
+	match_path(PathMatchs, URLDecode, undefined, Path, []);
+match([{HostMatch, PathMatchs}|Tail], URLDecode, Tokens, Path)
+		when is_list(Tokens) ->
+	case list_match(Tokens, lists:reverse(HostMatch), []) of
 		false ->
-			match(Host, Path, Tail);
-		{true, HostBinds, undefined} ->
-			match_path(Path, PathMatchs, HostBinds, undefined);
-		{true, HostBinds, HostInfo} ->
-			match_path(Path, PathMatchs, HostBinds, lists:reverse(HostInfo))
-	end.
+			match(Tail, URLDecode, Tokens, Path);
+		{true, Bindings, undefined} ->
+			match_path(PathMatchs, URLDecode, undefined, Path, Bindings);
+		{true, Bindings, HostInfo} ->
+			match_path(PathMatchs, URLDecode, lists:reverse(HostInfo),
+				Path, Bindings)
+	end;
+match(Dispatch, URLDecode, Host, Path) ->
+	match(Dispatch, URLDecode, split_host(Host), Path).
 
--spec match_path(tokens(), dispatch_path(), bindings(),
-	HostInfo::undefined | tokens())
+-spec match_path(dispatch_path(), fun((binary()) -> binary()),
+	HostInfo::undefined | tokens(), binary() | tokens(), bindings())
 	-> {ok, module(), any(), bindings(),
 		HostInfo::undefined | tokens(),
 		PathInfo::undefined | tokens()}
 	| {error, notfound, path}.
-match_path(_Path, [], _HostBinds, _HostInfo) ->
+match_path([], _, _, _, _) ->
 	{error, notfound, path};
-match_path(_Path, [{'_', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
-	{ok, Handler, Opts, HostBinds, HostInfo, undefined};
-match_path('*', [{'*', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
-	{ok, Handler, Opts, HostBinds, HostInfo, undefined};
-match_path(Path, [{PathMatch, Handler, Opts}|Tail], HostBinds, HostInfo) ->
-	case list_match(Path, PathMatch, []) of
+match_path([{'_', Handler, Opts}|_Tail], _, HostInfo, _, Bindings) ->
+	{ok, Handler, Opts, Bindings, HostInfo, undefined};
+match_path([{'*', Handler, Opts}|_Tail], _, HostInfo, '*', Bindings) ->
+	{ok, Handler, Opts, Bindings, HostInfo, undefined};
+match_path([{PathMatch, Handler, Opts}|Tail], URLDecode, HostInfo, Tokens,
+		Bindings) when is_list(Tokens) ->
+	case list_match(Tokens, PathMatch, []) of
 		false ->
-			match_path(Path, Tail, HostBinds, HostInfo);
+			match_path(Tail, URLDecode, HostInfo, Tokens, Bindings);
 		{true, PathBinds, PathInfo} ->
-			{ok, Handler, Opts, HostBinds ++ PathBinds, HostInfo, PathInfo}
-	end.
+			{ok, Handler, Opts, Bindings ++ PathBinds, HostInfo, PathInfo}
+	end;
+match_path(Dispatch, URLDecode, HostInfo, Path, Bindings) ->
+	match_path(Dispatch, URLDecode, HostInfo,
+		split_path(Path, URLDecode), Bindings).
 
 %% Internal.
 
+%% @doc Split a hostname into a list of tokens.
+-spec split_host(binary()) -> tokens().
+split_host(Host) ->
+	split_host(Host, []).
+
+split_host(Host, Acc) ->
+	case binary:match(Host, <<".">>) of
+		nomatch when Host =:= <<>> ->
+			Acc;
+		nomatch ->
+			[Host|Acc];
+		{Pos, _} ->
+			<< Segment:Pos/binary, _:8, Rest/bits >> = Host,
+			false = byte_size(Segment) == 0,
+			split_host(Rest, [Segment|Acc])
+	end.
+
+%% @doc Split a path into a list of path segments.
+%%
+%% Following RFC2396, this function may return path segments containing any
+%% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
+%% and part of a path segment.
+-spec split_path(binary(), fun((binary()) -> binary())) -> tokens().
+split_path(<< $/, Path/bits >>, URLDec) ->
+	split_path(Path, URLDec, []).
+
+split_path(Path, URLDec, Acc) ->
+	case binary:match(Path, <<"/">>) of
+		nomatch when Path =:= <<>> ->
+			lists:reverse([URLDec(S) || S <- Acc]);
+		nomatch ->
+			lists:reverse([URLDec(S) || S <- [Path|Acc]]);
+		{Pos, _} ->
+			<< Segment:Pos/binary, _:8, Rest/bits >> = Path,
+			split_path(Rest, URLDec, [Segment|Acc])
+	end.
+
 -spec list_match(tokens(), match_rule(), bindings())
 	-> {true, bindings(), undefined | tokens()} | false.
 %% Atom '...' matches any trailing path, stop right now.
@@ -188,64 +178,32 @@ list_match(_List, _Match, _Binds) ->
 split_host_test_() ->
 	%% {Host, Result}
 	Tests = [
-		{<<"">>, {[], <<"">>, undefined}},
-		{<<"*">>, {[<<"*">>], <<"*">>, undefined}},
+		{<<"">>, []},
+		{<<"*">>, [<<"*">>]},
 		{<<"cowboy.ninenines.eu">>,
-			{[<<"eu">>, <<"ninenines">>, <<"cowboy">>],
-			 <<"cowboy.ninenines.eu">>, undefined}},
+			[<<"eu">>, <<"ninenines">>, <<"cowboy">>]},
 		{<<"ninenines.eu">>,
-			{[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, undefined}},
-		{<<"ninenines.eu:8080">>,
-			{[<<"eu">>, <<"ninenines">>], <<"ninenines.eu">>, 8080}},
+			[<<"eu">>, <<"ninenines">>]},
 		{<<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>,
-			{[<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>,
-			  <<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>,
-			  <<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>,
-			  <<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>],
-			 <<"a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z">>,
-			 undefined}}
+			[<<"z">>, <<"y">>, <<"x">>, <<"w">>, <<"v">>, <<"u">>, <<"t">>,
+			<<"s">>, <<"r">>, <<"q">>, <<"p">>, <<"o">>, <<"n">>, <<"m">>,
+			<<"l">>, <<"k">>, <<"j">>, <<"i">>, <<"h">>, <<"g">>, <<"f">>,
+			<<"e">>, <<"d">>, <<"c">>, <<"b">>, <<"a">>]}
 	],
 	[{H, fun() -> R = split_host(H) end} || {H, R} <- Tests].
 
-split_host_fail_test_() ->
-	Tests = [
-		<<".........">>,
-		<<"ninenines..eu">>,
-		<<"ninenines.eu:owns">>,
-		<<"ninenines.eu: owns">>,
-		<<"ninenines.eu:42fun">>,
-		<<"ninenines.eu: 42fun">>,
-		<<"ninenines.eu:42 fun">>,
-		<<"ninenines.eu:fun 42">>,
-		<<"ninenines.eu: 42">>,
-		<<":owns">>,
-		<<":42 fun">>
-	],
-	[{H, fun() -> case catch split_host(H) of
-		{'EXIT', _Reason} -> ok
-	end end} || H <- Tests].
-
 split_path_test_() ->
 	%% {Path, Result, QueryString}
 	Tests = [
-		{<<"/?">>, [], <<"/">>, <<"">>},
-		{<<"/???">>, [], <<"/">>, <<"??">>},
-		{<<"/">>, [], <<"/">>, <<"">>},
-		{<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>],
-			<<"/extend//cowboy">>, <<>>},
-		{<<"/users">>, [<<"users">>], <<"/users">>, <<"">>},
-		{<<"/users?">>, [<<"users">>], <<"/users">>, <<"">>},
-		{<<"/users?a">>, [<<"users">>], <<"/users">>, <<"a">>},
-		{<<"/users/42/friends?a=b&c=d&e=notsure?whatever">>,
-			[<<"users">>, <<"42">>, <<"friends">>],
-			<<"/users/42/friends">>, <<"a=b&c=d&e=notsure?whatever">>},
-		{<<"/users/a+b/c%21d?e+f=g+h">>,
-			[<<"users">>, <<"a b">>, <<"c!d">>],
-			<<"/users/a+b/c%21d">>, <<"e+f=g+h">>}
+		{<<"/">>, []},
+		{<<"/extend//cowboy">>, [<<"extend">>, <<>>, <<"cowboy">>]},
+		{<<"/users">>, [<<"users">>]},
+		{<<"/users/42/friends">>, [<<"users">>, <<"42">>, <<"friends">>]},
+		{<<"/users/a+b/c%21d">>, [<<"users">>, <<"a b">>, <<"c!d">>]}
 	],
 	URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
-	[{P, fun() -> {R, RawP, Qs} = split_path(P, URLDecode) end}
-		|| {P, R, RawP, Qs} <- Tests].
+	[{P, fun() -> R = split_path(P, URLDecode) end}
+		|| {P, R} <- Tests].
 
 match_test_() ->
 	Dispatch = [
@@ -270,28 +228,31 @@ match_test_() ->
 	],
 	%% {Host, Path, Result}
 	Tests = [
-		{[<<"any">>], [], {ok, match_any, [], []}},
-		{[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>],
-			[<<"users">>, <<"42">>, <<"mails">>],
+		{<<"any">>, <<"/">>, {ok, match_any, [], []}},
+		{<<"www.any.ninenines.eu">>, <<"/users/42/mails">>,
 			{ok, match_any_subdomain_users, [], []}},
-		{[<<"eu">>, <<"ninenines">>, <<"www">>],
-			[<<"users">>, <<"42">>, <<"mails">>], {ok, match_any, [], []}},
-		{[<<"eu">>, <<"ninenines">>, <<"www">>], [], {ok, match_any, [], []}},
-		{[<<"eu">>, <<"ninenines">>, <<"any">>, <<"www">>],
-			[<<"not_users">>, <<"42">>, <<"mails">>], {error, notfound, path}},
-		{[<<"eu">>, <<"ninenines">>], [], {ok, match_extend, [], []}},
-		{[<<"eu">>, <<"ninenines">>], [<<"users">>, <<"42">>, <<"friends">>],
+		{<<"www.ninenines.eu">>, <<"/users/42/mails">>,
+			{ok, match_any, [], []}},
+		{<<"www.ninenines.eu">>, <<"/">>,
+			{ok, match_any, [], []}},
+		{<<"www.any.ninenines.eu">>, <<"/not_users/42/mails">>,
+			{error, notfound, path}},
+		{<<"ninenines.eu">>, <<"/">>,
+			{ok, match_extend, [], []}},
+		{<<"ninenines.eu">>, <<"/users/42/friends">>,
 			{ok, match_extend_users_friends, [], [{id, <<"42">>}]}},
-		{[<<"fr">>, <<"erlang">>], '_',
+		{<<"erlang.fr">>, '_',
 			{ok, match_erlang_ext, [], [{ext, <<"fr">>}]}},
-		{[<<"any">>], [<<"users">>, <<"444">>, <<"friends">>],
+		{<<"any">>, <<"/users/444/friends">>,
 			{ok, match_users_friends, [], [{id, <<"444">>}]}},
-		{[<<"fr">>, <<"ninenines">>], [<<"threads">>, <<"987">>],
+		{<<"ninenines.fr">>, <<"/threads/987">>,
 			{ok, match_duplicate_vars, [we, {expect, two}, var, here],
-			[{var, <<"fr">>}, {var, <<"987">>}]}}
+				[{var, <<"fr">>}, {var, <<"987">>}]}}
 	],
+	URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
 	[{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() ->
-		{ok, Handler, Opts, Binds, undefined, undefined} = match(H, P, Dispatch)
+		{ok, Handler, Opts, Binds, undefined, undefined}
+			= match(Dispatch, URLDecode, H, P)
 	end} || {H, P, {ok, Handler, Opts, Binds}} <- Tests].
 
 match_info_test_() ->
@@ -304,24 +265,22 @@ match_info_test_() ->
 		]}
 	],
 	Tests = [
-		{[<<"eu">>, <<"ninenines">>], [],
+		{<<"ninenines.eu">>, <<"/">>,
 			{ok, match_any, [], [], [], undefined}},
-		{[<<"eu">>, <<"ninenines">>, <<"bugs">>], [],
+		{<<"bugs.ninenines.eu">>, <<"/">>,
 			{ok, match_any, [], [], [<<"bugs">>], undefined}},
-		{[<<"eu">>, <<"ninenines">>, <<"bugs">>, <<"cowboy">>], [],
+		{<<"cowboy.bugs.ninenines.eu">>, <<"/">>,
 			{ok, match_any, [], [], [<<"cowboy">>, <<"bugs">>], undefined}},
-		{[<<"eu">>, <<"ninenines">>, <<"www">>],
-			[<<"pathinfo">>, <<"is">>, <<"next">>],
+		{<<"www.ninenines.eu">>, <<"/pathinfo/is/next">>,
 			{ok, match_path, [], [], undefined, []}},
-		{[<<"eu">>, <<"ninenines">>, <<"www">>],
-			[<<"pathinfo">>, <<"is">>, <<"next">>, <<"path_info">>],
+		{<<"www.ninenines.eu">>, <<"/pathinfo/is/next/path_info">>,
 			{ok, match_path, [], [], undefined, [<<"path_info">>]}},
-		{[<<"eu">>, <<"ninenines">>, <<"www">>],
-			[<<"pathinfo">>, <<"is">>, <<"next">>, <<"foo">>, <<"bar">>],
+		{<<"www.ninenines.eu">>, <<"/pathinfo/is/next/foo/bar">>,
 			{ok, match_path, [], [], undefined, [<<"foo">>, <<"bar">>]}}
 	],
+	URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
 	[{lists:flatten(io_lib:format("~p, ~p", [H, P])), fun() ->
-		R = match(H, P, Dispatch)
+		R = match(Dispatch, URLDecode, H, P)
 	end} || {H, P, R} <- Tests].
 
 -endif.
author	Loïc Hoguin <[email protected]>	2012-09-26 14:11:53 +0200
committer	Loïc Hoguin <[email protected]>	2012-09-26 14:20:29 +0200
commit	b2243aa54438dbea4137960c9dc6f54ac746f429 (patch)
tree	b7f3768cc6fea9bcc9d4363012a6cdf149178977 /src/cowboy_dispatcher.erl
parent	bfab8d4b22d858e7cffa97d04210a62fae56681c (diff)
download	cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.tar.gz cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.tar.bz2 cowboy-b2243aa54438dbea4137960c9dc6f54ac746f429.zip