%% Copyright (c) 2011, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
%% @doc Core HTTP parsing API.
-module(cowboy_http).
%% Parsing.
-export([list/2, nonempty_list/2,
media_range/2, conneg/2,
token/2, token_ci/2, quoted_string/2]).
%% Interpretation.
-export([connection_to_atom/1]).
-include("include/http.hrl").
-include_lib("eunit/include/eunit.hrl").
%% Parsing.
%% @doc Parse a non-empty list of the given type.
-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
nonempty_list(Data, Fun) ->
case list(Data, Fun, []) of
{error, badarg} -> {error, badarg};
[] -> {error, badarg};
L -> lists:reverse(L)
end.
%% @doc Parse a list of the given type.
-spec list(binary(), fun()) -> list() | {error, badarg}.
list(Data, Fun) ->
case list(Data, Fun, []) of
{error, badarg} -> {error, badarg};
L -> lists:reverse(L)
end.
-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
%% From the RFC:
%% <blockquote>Wherever this construct is used, null elements are allowed,
%% but do not contribute to the count of elements present.
%% That is, "(element), , (element) " is permitted, but counts
%% as only two elements. Therefore, where at least one element is required,
%% at least one non-null element MUST be present.</blockquote>
list(Data, Fun, Acc) ->
whitespace(Data,
fun (<<>>) -> Acc;
(<< $,, Rest/bits >>) -> list(Rest, Fun, Acc);
(Rest) -> Fun(Rest,
fun (D, I) -> whitespace(D,
fun (<<>>) -> [I|Acc];
(<< $,, R/bits >>) -> list(R, Fun, [I|Acc]);
(_Any) -> {error, badarg}
end)
end)
end).
%% @doc Parse a media range.
-spec media_range(binary(), fun()) -> any().
media_range(Data, Fun) ->
token_ci(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Type) -> whitespace(Rest,
fun (<< $/, Rest2/bits >>) -> whitespace(Rest2,
fun (<<>>) -> {error, badarg};
(Rest3) -> media_range_subtype(Rest3, Fun, Type)
end);
(_Rest2) -> {error, badarg}
end)
end).
-spec media_range_subtype(binary(), fun(), binary()) -> any().
media_range_subtype(Data, Fun, Type) ->
token_ci(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, SubType) -> media_range_params(Rest, Fun, Type, SubType, [])
end).
-spec media_range_params(binary(), fun(), binary(), binary(),
[{binary(), binary()}]) -> any().
media_range_params(Data, Fun, Type, SubType, Acc) ->
whitespace(Data,
fun (<< $;, Rest/bits >>) ->
whitespace(Rest,
fun (Rest2) ->
media_range_param_attr(Rest2, Fun, Type, SubType, Acc)
end);
(Rest) -> Fun(Rest, {{Type, SubType, lists:reverse(Acc)}, 1000, []})
end).
-spec media_range_param_attr(binary(), fun(), binary(), binary(),
[{binary(), binary()}]) -> any().
media_range_param_attr(Data, Fun, Type, SubType, Acc) ->
token_ci(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Attr) ->
whitespace(Rest,
fun (<< $=, Rest2/bits >>) ->
whitespace(Rest2,
fun (<<>>) -> {error, badarg};
(Rest3) ->
media_range_param_value(Rest3, Fun,
Type, SubType, Acc, Attr)
end);
(_Rest2) ->
{error, badarg}
end)
end).
-spec media_range_param_value(binary(), fun(), binary(), binary(),
[{binary(), binary()}], binary()) -> any().
media_range_param_value(Data, Fun, Type, SubType, Acc, <<"q">>) ->
qvalue(Data,
fun (Rest, Quality) ->
accept_ext(Rest, Fun, Type, SubType, Acc, Quality, [])
end);
media_range_param_value(Data = << $", _/bits >>, Fun,
Type, SubType, Acc, Attr) ->
quoted_string(Data,
fun (Rest, Value) ->
media_range_params(Rest, Fun,
Type, SubType, [{Attr, Value}|Acc])
end);
media_range_param_value(Data, Fun, Type, SubType, Acc, Attr) ->
token(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Value) ->
media_range_params(Rest, Fun,
Type, SubType, [{Attr, Value}|Acc])
end).
-spec accept_ext(binary(), fun(), binary(), binary(),
[{binary(), binary()}], 0..1000,
[{binary(), binary()} | binary()]) -> any().
accept_ext(Data, Fun, Type, SubType, Params, Quality, Acc) ->
whitespace(Data,
fun (<< $;, Rest/bits >>) ->
whitespace(Rest,
fun (Rest2) ->
accept_ext_attr(Rest2, Fun,
Type, SubType, Params, Quality, Acc)
end);
(Rest) ->
Fun(Rest, {{Type, SubType, lists:reverse(Params)},
Quality, lists:reverse(Acc)})
end).
-spec accept_ext_attr(binary(), fun(), binary(), binary(),
[{binary(), binary()}], 0..1000,
[{binary(), binary()} | binary()]) -> any().
accept_ext_attr(Data, Fun, Type, SubType, Params, Quality, Acc) ->
token_ci(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Attr) ->
whitespace(Rest,
fun (<< $=, Rest2/bits >>) ->
whitespace(Rest2,
fun (<<>>) -> {error, badarg};
(Rest3) ->
accept_ext_value(Rest3, Fun,
Type, SubType, Params,
Quality, Acc, Attr)
end);
(Rest2) ->
accept_ext(Rest2, Fun,
Type, SubType, Params,
Quality, [Attr|Acc])
end)
end).
-spec accept_ext_value(binary(), fun(), binary(), binary(),
[{binary(), binary()}], 0..1000,
[{binary(), binary()} | binary()], binary()) -> any().
accept_ext_value(Data = << $", _/bits >>, Fun,
Type, SubType, Params, Quality, Acc, Attr) ->
quoted_string(Data,
fun (Rest, Value) ->
accept_ext(Rest, Fun,
Type, SubType, Params, Quality, [{Attr, Value}|Acc])
end);
accept_ext_value(Data, Fun, Type, SubType, Params, Quality, Acc, Attr) ->
token(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Value) ->
accept_ext(Rest, Fun,
Type, SubType, Params, Quality, [{Attr, Value}|Acc])
end).
%% @doc Parse a conneg header (Accept-Charset, Accept-Encoding),
%% followed by an optional quality value.
-spec conneg(binary(), fun()) -> any().
conneg(Data, Fun) ->
token_ci(Data,
fun (_Rest, <<>>) -> {error, badarg};
(Rest, Conneg) ->
whitespace(Rest,
fun (<< $;, Rest2/bits >>) ->
whitespace(Rest2,
fun (Rest3) ->
qparam(Rest3,
fun (Rest4, Quality) ->
Fun(Rest4, {Conneg, Quality})
end)
end);
(Rest2) ->
Fun(Rest2, {Conneg, 1000})
end)
end).
%% Parse a quality parameter string (for example q=0.500).
-spec qparam(binary(), fun()) -> any().
qparam(<< $q, Rest/bits >>, Fun) ->
whitespace(Rest,
fun (<< $=, Rest2/bits >>) ->
whitespace(Rest2,
fun (Rest3) ->
qvalue(Rest3,
fun (Rest4, Quality) ->
Fun(Rest4, Quality)
end)
end);
(_Rest2) ->
{error, badarg}
end).
%% @doc Skip whitespace.
-spec whitespace(binary(), fun()) -> any().
whitespace(<< C, Rest/bits >>, Fun)
when C =:= $\s; C =:= $\t ->
whitespace(Rest, Fun);
whitespace(Data, Fun) ->
Fun(Data).
%% @doc Parse a case-insensitive token.
%%
%% Changes all characters to lowercase.
-spec token_ci(binary(), fun()) -> any().
token_ci(Data, Fun) ->
token(Data, Fun, ci, <<>>).
%% @doc Parse a token.
-spec token(binary(), fun()) -> any().
token(Data, Fun) ->
token(Data, Fun, cs, <<>>).
-spec token(binary(), fun(), ci | cs, binary()) -> any().
token(<<>>, Fun, _Case, Acc) ->
Fun(<<>>, Acc);
token(Data = << C, _Rest/bits >>, Fun, _Case, Acc)
when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
C < 32; C =:= 127 ->
Fun(Data, Acc);
token(<< C, Rest/bits >>, Fun, Case = ci, Acc) ->
C2 = cowboy_bstr:char_to_lower(C),
token(Rest, Fun, Case, << Acc/binary, C2 >>);
token(<< C, Rest/bits >>, Fun, Case, Acc) ->
token(Rest, Fun, Case, << Acc/binary, C >>).
%% @doc Parse a quoted string.
-spec quoted_string(binary(), fun()) -> any().
quoted_string(<< $", Rest/bits >>, Fun) ->
quoted_string(Rest, Fun, <<>>).
-spec quoted_string(binary(), fun(), binary()) -> any().
quoted_string(<<>>, _Fun, _Acc) ->
{error, badarg};
quoted_string(<< $", Rest/bits >>, Fun, Acc) ->
Fun(Rest, Acc);
quoted_string(<< $\\, C, Rest/bits >>, Fun, Acc) ->
quoted_string(Rest, Fun, << Acc/binary, C >>);
quoted_string(<< C, Rest/bits >>, Fun, Acc) ->
quoted_string(Rest, Fun, << Acc/binary, C >>).
%% @doc Parse a quality value.
-spec qvalue(binary(), fun()) -> any().
qvalue(<< $0, $., Rest/bits >>, Fun) ->
qvalue(Rest, Fun, 0, 100);
qvalue(<< $0, Rest/bits >>, Fun) ->
Fun(Rest, 0);
qvalue(<< $1, $., $0, $0, $0, Rest/bits >>, Fun) ->
Fun(Rest, 1000);
qvalue(<< $1, $., $0, $0, Rest/bits >>, Fun) ->
Fun(Rest, 1000);
qvalue(<< $1, $., $0, Rest/bits >>, Fun) ->
Fun(Rest, 1000);
qvalue(<< $1, Rest/bits >>, Fun) ->
Fun(Rest, 1000);
qvalue(_Data, _Fun) ->
{error, badarg}.
-spec qvalue(binary(), fun(), integer(), 1 | 10 | 100) -> any().
qvalue(Data, Fun, Q, 0) ->
Fun(Data, Q);
qvalue(<< C, Rest/bits >>, Fun, Q, M)
when C =:= $0; C =:= $1; C =:= $2; C =:= $3; C =:= $4;
C =:= $5; C =:= $6; C =:= $7; C =:= $8; C =:= $9 ->
qvalue(Rest, Fun, Q + (C - $0) * M, M div 10);
qvalue(Data, Fun, Q, _M) ->
Fun(Data, Q).
%% Interpretation.
%% @doc Walk through a tokens list and return whether
%% the connection is keepalive or closed.
%%
%% The connection token is expected to be lower-case.
-spec connection_to_atom([binary()]) -> keepalive | close.
connection_to_atom([]) ->
keepalive;
connection_to_atom([<<"keep-alive">>|_Tail]) ->
keepalive;
connection_to_atom([<<"close">>|_Tail]) ->
close;
connection_to_atom([_Any|Tail]) ->
connection_to_atom(Tail).
%% Tests.
-ifdef(TEST).
nonempty_charset_list_test_() ->
%% {Value, Result}
Tests = [
{<<>>, {error, badarg}},
{<<"iso-8859-5, unicode-1-1;q=0.8">>, [
{<<"iso-8859-5">>, 1000},
{<<"unicode-1-1">>, 800}
]}
],
[{V, fun() -> R = nonempty_list(V, fun conneg/2) end} || {V, R} <- Tests].
nonempty_token_list_test_() ->
%% {Value, Result}
Tests = [
{<<>>, {error, badarg}},
{<<" ">>, {error, badarg}},
{<<" , ">>, {error, badarg}},
{<<",,,">>, {error, badarg}},
{<<"a b">>, {error, badarg}},
{<<"a , , , ">>, [<<"a">>]},
{<<" , , , a">>, [<<"a">>]},
{<<"a, , b">>, [<<"a">>, <<"b">>]},
{<<"close">>, [<<"close">>]},
{<<"keep-alive, upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
],
[{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests].
media_range_list_test_() ->
%% {Tokens, Result}
Tests = [
{<<"audio/*; q=0.2, audio/basic">>, [
{{<<"audio">>, <<"*">>, []}, 200, []},
{{<<"audio">>, <<"basic">>, []}, 1000, []}
]},
{<<"text/plain; q=0.5, text/html, "
"text/x-dvi; q=0.8, text/x-c">>, [
{{<<"text">>, <<"plain">>, []}, 500, []},
{{<<"text">>, <<"html">>, []}, 1000, []},
{{<<"text">>, <<"x-dvi">>, []}, 800, []},
{{<<"text">>, <<"x-c">>, []}, 1000, []}
]},
{<<"text/*, text/html, text/html;level=1, */*">>, [
{{<<"text">>, <<"*">>, []}, 1000, []},
{{<<"text">>, <<"html">>, []}, 1000, []},
{{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
{{<<"*">>, <<"*">>, []}, 1000, []}
]},
{<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
"text/html;level=2;q=0.4, */*;q=0.5">>, [
{{<<"text">>, <<"*">>, []}, 300, []},
{{<<"text">>, <<"html">>, []}, 700, []},
{{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
{{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
{{<<"*">>, <<"*">>, []}, 500, []}
]},
{<<"text/html;level=1;quoted=\"hi hi hi\";"
"q=0.123;standalone;complex=gits, text/plain">>, [
{{<<"text">>, <<"html">>,
[{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
[<<"standalone">>, {<<"complex">>, <<"gits">>}]},
{{<<"text">>, <<"plain">>, []}, 1000, []}
]}
],
[{V, fun() -> R = list(V, fun media_range/2) end} || {V, R} <- Tests].
connection_to_atom_test_() ->
%% {Tokens, Result}
Tests = [
{[<<"close">>], close},
{[<<"keep-alive">>], keepalive},
{[<<"keep-alive">>, <<"upgrade">>], keepalive}
],
[{lists:flatten(io_lib:format("~p", [T])),
fun() -> R = connection_to_atom(T) end} || {T, R} <- Tests].
-endif.