aboutsummaryrefslogtreecommitdiffstats
path: root/src/cow_http_hd.erl
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2014-12-14 23:24:44 +0200
committerLoïc Hoguin <[email protected]>2014-12-14 23:24:44 +0200
commit223902002097370da74494e7d3d5463ed1707ea7 (patch)
treef3a6dc4e3e32c6fefe0849c86f940d5dd0e84309 /src/cow_http_hd.erl
parent986a0786edbcba8c2260161a1a1602ced040d548 (diff)
downloadcowlib-223902002097370da74494e7d3d5463ed1707ea7.tar.gz
cowlib-223902002097370da74494e7d3d5463ed1707ea7.tar.bz2
cowlib-223902002097370da74494e7d3d5463ed1707ea7.zip
Add cow_http_hd:parse_content_type/1
From RFC7231. This implementation is about 4 times faster than the one currently found in Cowboy.
Diffstat (limited to 'src/cow_http_hd.erl')
-rw-r--r--src/cow_http_hd.erl169
1 files changed, 168 insertions, 1 deletions
diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl
index 668b80d..49db989 100644
--- a/src/cow_http_hd.erl
+++ b/src/cow_http_hd.erl
@@ -20,10 +20,14 @@
-export([parse_accept_language/1]).
-export([parse_connection/1]).
-export([parse_content_length/1]).
+-export([parse_content_type/1]).
-export([parse_expect/1]).
-export([parse_max_forwards/1]).
-export([parse_transfer_encoding/1]).
+-type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
+-export_type([media_type/0]).
+
-type qvalue() :: 0..1000.
-export_type([qvalue/0]).
@@ -31,11 +35,34 @@
-ifdef(TEST).
-include_lib("triq/include/triq.hrl").
+
+alpha_chars() -> lists:seq($a, $z) ++ lists:seq($A, $Z).
+digit_chars() -> lists:seq($0, $9).
+
+tchar() -> oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~] ++ digit_chars() ++ alpha_chars()).
+token() -> ?LET(T, non_empty(list(tchar())), list_to_binary(T)).
+
+qdtext() ->
+ oneof([$\t, $\s, $!] ++ lists:seq(16#23, 16#5b) ++ lists:seq(16#5d, 16#7e) ++ lists:seq(16#80, 16#ff)).
+
+quoted_pair() ->
+ [$\\, oneof([$\t, $\s] ++ lists:seq(16#21, 16#7e) ++ lists:seq(16#80, 16#ff))].
+
+quoted_string() ->
+ [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
+
+%% Helper function for ( token / quoted-string ) values.
+unquote([$", V, $"]) -> unquote(V, <<>>);
+unquote(V) -> V.
+
+unquote([], Acc) -> Acc;
+unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
+unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
-endif.
%% @doc Parse the Accept header.
--spec parse_accept(binary()) -> [{{binary(), binary(), [{binary(), binary()}]}, qvalue(), [binary() | {binary(), binary()}]}].
+-spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
parse_accept(<<"*/*">>) ->
[{{<<"*">>, <<"*">>, []}, 1000, []}];
parse_accept(Accept) ->
@@ -566,6 +593,146 @@ horse_parse_content_length_giga() ->
).
-endif.
+%% @doc Parse the Content-Type header.
+
+-spec parse_content_type(binary()) -> media_type().
+parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_type, R, <<>>)
+ end.
+
+media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
+ end;
+media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_type, R, T)
+ end.
+
+media_subtype(<<>>, T, S) -> {T, S, []};
+media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
+media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
+media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
+media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_subtype, R, T, S)
+ end.
+
+media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
+media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
+media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
+media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
+
+media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
+media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
+media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
+media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
+media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
+ end.
+
+media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
+ media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
+media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
+ end;
+media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
+ end.
+
+media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
+
+media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
+media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
+media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
+media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
+ end.
+
+media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
+media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
+media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
+ case C of
+ ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
+ end.
+
+media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
+media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
+media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
+
+media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
+media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
+media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
+media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
+media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
+
+-ifdef(TEST).
+media_type_parameter() ->
+ frequency([
+ {90, {token(), oneof([token(), quoted_string()])}},
+ {10, {<<"charset">>, oneof([token(), quoted_string()])}}
+ ]).
+
+media_type() ->
+ ?LET({T, S, P},
+ {token(), token(), list(media_type_parameter())},
+ {T, S, P, iolist_to_binary([T, $/, S, [[$;, K, $=, V] || {K, V} <- P]])}
+ ).
+
+prop_parse_content_type() ->
+ ?FORALL({T, S, P, MediaType},
+ media_type(),
+ begin
+ {ResT, ResS, ResP} = parse_content_type(MediaType),
+ ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
+ <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
+ LowK -> {LowK, unquote(V)}
+ end || {K, V} <- P],
+ ResT =:= ?INLINE_LOWERCASE_BC(T)
+ andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
+ andalso ResP =:= ExpectedP
+ end
+ ).
+
+parse_content_type_test_() ->
+ Tests = [
+ {<<"text/html;charset=utf-8">>,
+ {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
+ {<<"text/html;charset=UTF-8">>,
+ {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
+ {<<"Text/HTML;Charset=\"utf-8\"">>,
+ {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
+ {<<"text/html; charset=\"utf-8\"">>,
+ {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
+ {<<"text/html; charset=ISO-8859-4">>,
+ {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
+ {<<"text/plain; charset=iso-8859-4">>,
+ {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
+ {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
+ {<<"multipart">>, <<"form-data">>, [
+ {<<"boundary">>, <<"MultipartIsUgly">>}
+ ]}},
+ {<<"foo/bar; one=FirstParam; two=SecondParam">>,
+ {<<"foo">>, <<"bar">>, [
+ {<<"one">>, <<"FirstParam">>},
+ {<<"two">>, <<"SecondParam">>}
+ ]}}
+ ],
+ [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
+-endif.
+
+-ifdef(PERF).
+horse_parse_content_type() ->
+ horse:repeat(200000,
+ parse_content_type(<<"text/html;charset=utf-8">>)
+ ).
+-endif.
+
%% @doc Parse the Expect header.
-spec parse_expect(binary()) -> continue.