From 1e7338bdd4768123da177895ec6c91d3dabb7ca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= <essen@dev-extend.eu>
Date: Mon, 17 Oct 2011 09:15:01 +0200
Subject: Rewrite the token list parsing into separate, modulable functions

Introduce cowboy_http's list/2, nonempty_list/2, token/2 functions.
---
 src/cowboy_http.erl | 102 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 62 insertions(+), 40 deletions(-)

(limited to 'src/cowboy_http.erl')
diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl
index 8d60f82..ae73ba5 100644
--- a/src/cowboy_http.erl
+++ b/src/cowboy_http.erl
@@ -12,10 +12,11 @@
 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
+%% @doc Core HTTP parsing API.
 -module(cowboy_http).
 
 %% Parsing.
--export([parse_tokens_list/1]).
+-export([list/2, nonempty_list/2, token/2]).
 
 %% Interpretation.
 -export([connection_to_atom/1]).
@@ -25,53 +26,74 @@
 
 %% Parsing.
 
-%% @doc Parse a list of tokens, as is often found in HTTP headers.
-%%
+%% @doc Parse a non-empty list of the given type.
+-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
+nonempty_list(Data, Fun) ->
+	case list(Data, Fun, []) of
+		{error, badarg} -> {error, badarg};
+		[] -> {error, badarg};
+		L -> lists:reverse(L)
+	end.
+
+%% @doc Parse a list of the given type.
+-spec list(binary(), fun()) -> list() | {error, badarg}.
+list(Data, Fun) ->
+	case list(Data, Fun, []) of
+		{error, badarg} -> {error, badarg};
+		L -> lists:reverse(L)
+	end.
+
+-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
+list(<<>>, _Fun, Acc) ->
+	Acc;
 %% From the RFC:
 %% <blockquote>Wherever this construct is used, null elements are allowed,
 %% but do not contribute to the count of elements present.
 %% That is, "(element), , (element) " is permitted, but counts
 %% as only two elements. Therefore, where at least one element is required,
 %% at least one non-null element MUST be present.</blockquote>
--spec parse_tokens_list(binary()) -> [binary()] | {error, badarg}.
-parse_tokens_list(Value) ->
-	case parse_tokens_list(Value, ws_or_sep, <<>>, []) of
-		{error, badarg} ->
-			{error, badarg};
-		L when length(L) =:= 0 ->
-			{error, badarg};
-		L ->
-			lists:reverse(L)
-	end.
+list(<< $,, Rest/bits >>, Fun, Acc) ->
+	list(Rest, Fun, Acc);
+list(Data, Fun, Acc) ->
+	Fun(Data,
+		fun (R, <<>>) -> list_separator(R,
+				fun (D) -> list(D, Fun, Acc) end);
+			(R, I) -> list_separator(R,
+				fun (D) -> list(D, Fun, [I|Acc]) end)
+		end).
 
--spec parse_tokens_list(binary(), token | ws | ws_or_sep, binary(),
-	[binary()]) -> [binary()] | {error, badarg}.
-parse_tokens_list(<<>>, token, Token, Acc) ->
-	[Token|Acc];
-parse_tokens_list(<< C, Rest/bits >>, token, Token, Acc)
+-spec list_separator(binary(), fun()) -> any().
+list_separator(<<>>, Fun) ->
+	Fun(<<>>);
+list_separator(<< $,, Rest/bits >>, Fun) ->
+	Fun(Rest);
+list_separator(<< C, Rest/bits >>, Fun)
 		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws, <<>>, [Token|Acc]);
-parse_tokens_list(<< $,, Rest/bits >>, token, Token, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, [Token|Acc]);
-parse_tokens_list(<< C, Rest/bits >>, token, Token, Acc) ->
-	parse_tokens_list(Rest, token, << Token/binary, C >>, Acc);
-parse_tokens_list(<< C, Rest/bits >>, ws, <<>>, Acc)
-		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws, <<>>, Acc);
-parse_tokens_list(<< $,, Rest/bits >>, ws, <<>>, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<<>>, ws_or_sep, <<>>, Acc) ->
-	Acc;
-parse_tokens_list(<< C, Rest/bits >>, ws_or_sep, <<>>, Acc)
-		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<< $,, Rest/bits >>, ws_or_sep, <<>>, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<< C, Rest/bits >>, ws_or_sep, <<>>, Acc) ->
-	parse_tokens_list(Rest, token, << C >>, Acc);
-parse_tokens_list(_Value, _State, _Token, _Acc) ->
+	list_separator(Rest, Fun);
+list_separator(_Data, _Fun) ->
 	{error, badarg}.
 
+%% @doc Parse a token.
+-spec token(binary(), fun()) -> any().
+token(<< C, Rest/bits >>, Fun)
+		when C =:= $\s; C =:= $\t ->
+	token(Rest, Fun);
+token(Data, Fun) ->
+	token(Data, Fun, <<>>).
+
+-spec token(binary(), fun(), binary()) -> any().
+token(<<>>, Fun, Acc) ->
+	Fun(<<>>, Acc);
+token(Data = << C, _Rest/bits >>, Fun, Acc)
+		when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
+			 C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
+			 C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
+			 C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
+			 C < 32; C =:= 127 ->
+	Fun(Data, Acc);
+token(<< C, Rest/bits >>, Fun, Acc) ->
+	token(Rest, Fun, << Acc/binary, C >>).
+
 %% Interpretation.
 
 %% @doc Walk through a tokens list and return whether
@@ -94,7 +116,7 @@ connection_to_atom([Connection|Tail]) ->
 
 -ifdef(TEST).
 
-parse_tokens_list_test_() ->
+nonempty_token_list_test_() ->
 	%% {Value, Result}
 	Tests = [
 		{<<>>, {error, badarg}},
@@ -108,7 +130,7 @@ parse_tokens_list_test_() ->
 		{<<"close">>, [<<"close">>]},
 		{<<"keep-alive, upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
 	],
-	[{V, fun() -> R = parse_tokens_list(V) end} || {V, R} <- Tests].
+	[{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests].
 
 connection_to_atom_test_() ->
 	%% {Tokens, Result}
-- 
cgit v1.2.3