From a8b793db3d6ffe91d62f81baf41b1dab4cd78fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Fri, 22 Nov 2019 14:23:14 +0100 Subject: Add cow_http_struct_hd Implements draft-ietf-httpbis-header-structure-14. --- Makefile | 5 +- ebin/cowlib.app | 2 +- src/cow_http_struct_hd.erl | 322 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 327 insertions(+), 2 deletions(-) create mode 100644 src/cow_http_struct_hd.erl diff --git a/Makefile b/Makefile index efcc622..76d02ec 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,11 @@ LOCAL_DEPS = crypto DOC_DEPS = asciideck -TEST_DEPS = $(if $(CI_ERLANG_MK),ci.erlang.mk) horse proper +TEST_DEPS = $(if $(CI_ERLANG_MK),ci.erlang.mk) base32 horse proper jsx structured-header-tests +dep_base32 = git https://github.com/dnsimple/base32_erlang master dep_horse = git https://github.com/ninenines/horse.git master +dep_jsx = git https://github.com/talentdeficit/jsx v2.10.0 +dep_structured-header-tests = git https://github.com/httpwg/structured-header-tests master # CI configuration. diff --git a/ebin/cowlib.app b/ebin/cowlib.app index 5f04bb9..18ab891 100644 --- a/ebin/cowlib.app +++ b/ebin/cowlib.app @@ -1,7 +1,7 @@ {application, 'cowlib', [ {description, "Support library for manipulating Web protocols."}, {vsn, "2.8.0"}, - {modules, ['cow_base64url','cow_cookie','cow_date','cow_hpack','cow_http','cow_http2','cow_http2_machine','cow_http_hd','cow_http_te','cow_iolists','cow_link','cow_mimetypes','cow_multipart','cow_qs','cow_spdy','cow_sse','cow_uri','cow_ws']}, + {modules, ['cow_base64url','cow_cookie','cow_date','cow_hpack','cow_http','cow_http2','cow_http2_machine','cow_http_hd','cow_http_struct_hd','cow_http_te','cow_iolists','cow_link','cow_mimetypes','cow_multipart','cow_qs','cow_spdy','cow_sse','cow_uri','cow_ws']}, {registered, []}, {applications, [kernel,stdlib,crypto]}, {env, []} diff --git a/src/cow_http_struct_hd.erl b/src/cow_http_struct_hd.erl new file mode 100644 index 0000000..acbf7b2 --- /dev/null +++ b/src/cow_http_struct_hd.erl @@ -0,0 +1,322 @@ +%% Copyright (c) 2019, Loïc Hoguin +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% The mapping between Erlang and structured headers types is as follow: +%% +%% List: list() +%% Dictionary: map() +%% Bare item: one bare_item() that can be of type: +%% Integer: integer() +%% Float: float() +%% String: {string, binary()} +%% Token: {token, binary()} +%% Byte sequence: {binary, binary()} +%% Boolean: boolean() +%% And finally: +%% Type with Parameters: {with_params, Type, Parameters} +%% Parameters: [{binary(), bare_item()}] + +-module(cow_http_struct_hd). + +-export([parse_dictionary/1]). +-export([parse_item/1]). +-export([parse_list/1]). + +-include("cow_parse.hrl"). + +-type sh_list() :: [sh_item() | sh_inner_list()]. +-type sh_inner_list() :: sh_with_params([sh_item()]). +-type sh_params() :: #{binary() => sh_bare_item() | undefined}. +-type sh_dictionary() :: #{binary() => sh_item() | sh_inner_list()}. +-type sh_item() :: sh_with_params(sh_bare_item()). +-type sh_bare_item() :: integer() | float() | boolean() + | {string | token | binary, binary()}. +-type sh_with_params(Type) :: {with_params, Type, sh_params()}. + +-define(IS_LC_ALPHA(C), + (C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or + (C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or + (C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or + (C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or + (C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or + (C =:= $z) +). + +%% Public interface. + +-spec parse_dictionary(binary()) -> sh_dictionary(). +parse_dictionary(<<>>) -> + #{}; +parse_dictionary(<>) when ?IS_LC_ALPHA(C) -> + {Dict, <<>>} = parse_dict_key(R, #{}, <>), + Dict. + +parse_dict_key(<<$=,$(,R0/bits>>, Acc, K) -> + false = maps:is_key(K, Acc), + {Item, R} = parse_inner_list(R0, []), + parse_dict_before_sep(R, Acc#{K => Item}); +parse_dict_key(<<$=,R0/bits>>, Acc, K) -> + false = maps:is_key(K, Acc), + {Item, R} = parse_item1(R0), + parse_dict_before_sep(R, Acc#{K => Item}); +parse_dict_key(<>, Acc, K) + when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) + or (C =:= $_) or (C =:= $-) or (C =:= $*) -> + parse_dict_key(R, Acc, <>). + +parse_dict_before_sep(<>, Acc) when ?IS_WS(C) -> + parse_dict_before_sep(R, Acc); +parse_dict_before_sep(<>, Acc) when C =:= $, -> + parse_dict_before_member(R, Acc); +parse_dict_before_sep(<<>>, Acc) -> + {Acc, <<>>}. + +parse_dict_before_member(<>, Acc) when ?IS_WS(C) -> + parse_dict_before_member(R, Acc); +parse_dict_before_member(<>, Acc) when ?IS_LC_ALPHA(C) -> + parse_dict_key(R, Acc, <>). + +-spec parse_item(binary()) -> sh_item(). +parse_item(Bin) -> + {Item, <<>>} = parse_item1(Bin), + Item. + +parse_item1(Bin) -> + case parse_bare_item(Bin) of + {Item, <<$;,R/bits>>} -> + {Params, Rest} = parse_before_param(R, #{}), + {{with_params, Item, Params}, Rest}; + {Item, Rest} -> + {{with_params, Item, #{}}, Rest} + end. + +-spec parse_list(binary()) -> sh_list(). +parse_list(<<>>) -> + []; +parse_list(Bin) -> + parse_list_before_member(Bin, []). + +parse_list_member(<<$(,R0/bits>>, Acc) -> + {Item, R} = parse_inner_list(R0, []), + parse_list_before_sep(R, [Item|Acc]); +parse_list_member(R0, Acc) -> + {Item, R} = parse_item1(R0), + parse_list_before_sep(R, [Item|Acc]). + +parse_list_before_sep(<>, Acc) when ?IS_WS(C) -> + parse_list_before_sep(R, Acc); +parse_list_before_sep(<<$,,R/bits>>, Acc) -> + parse_list_before_member(R, Acc); +parse_list_before_sep(<<>>, Acc) -> + lists:reverse(Acc). + +parse_list_before_member(<>, Acc) when ?IS_WS(C) -> + parse_list_before_member(R, Acc); +parse_list_before_member(R, Acc) -> + parse_list_member(R, Acc). + +%% Internal. + +parse_inner_list(<>, Acc) when ?IS_WS(C) -> + parse_inner_list(R, Acc); +parse_inner_list(<<$),$;,R0/bits>>, Acc) -> + {Params, R} = parse_before_param(R0, #{}), + {{with_params, lists:reverse(Acc), Params}, R}; +parse_inner_list(<<$),R/bits>>, Acc) -> + {{with_params, lists:reverse(Acc), #{}}, R}; +parse_inner_list(R0, Acc) -> + {Item, R = <>} = parse_item1(R0), + true = (C =:= $\s) orelse (C =:= $)), + parse_inner_list(R, [Item|Acc]). + +parse_before_param(<>, Acc) when ?IS_WS(C) -> + parse_before_param(R, Acc); +parse_before_param(<>, Acc) when ?IS_LC_ALPHA(C) -> + parse_param(R, Acc, <>). + +parse_param(<<$;,R/bits>>, Acc, K) -> + parse_before_param(R, Acc#{K => undefined}); +parse_param(<<$=,R0/bits>>, Acc, K) -> + case parse_bare_item(R0) of + {Item, <<$;,R/bits>>} -> + false = maps:is_key(K, Acc), + parse_before_param(R, Acc#{K => Item}); + {Item, R} -> + false = maps:is_key(K, Acc), + {Acc#{K => Item}, R} + end; +parse_param(<>, Acc, K) + when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) + or (C =:= $_) or (C =:= $-) or (C =:= $*) -> + parse_param(R, Acc, <>); +parse_param(R, Acc, K) -> + false = maps:is_key(K, Acc), + {Acc#{K => undefined}, R}. + +%% Integer or float. +parse_bare_item(<<$-,R/bits>>) -> parse_number(R, 0, <<$->>); +parse_bare_item(<>) when ?IS_DIGIT(C) -> parse_number(R, 1, <>); +%% String. +parse_bare_item(<<$",R/bits>>) -> parse_string(R, <<>>); +%% Token. +parse_bare_item(<>) when ?IS_ALPHA(C) -> parse_token(R, <>); +%% Byte sequence. +parse_bare_item(<<$*,R/bits>>) -> parse_binary(R, <<>>); +%% Boolean. +parse_bare_item(<<"?0",R/bits>>) -> {false, R}; +parse_bare_item(<<"?1",R/bits>>) -> {true, R}. + +parse_number(<>, L, Acc) when ?IS_DIGIT(C) -> + parse_number(R, L+1, <>); +parse_number(<>, L, Acc) when C =:= $. -> + parse_float(R, L, 0, <>); +parse_number(R, L, Acc) when L =< 15 -> + {binary_to_integer(Acc), R}. + +parse_float(<>, L1, L2, Acc) when ?IS_DIGIT(C) -> + parse_float(R, L1, L2+1, <>); +parse_float(R, L1, L2, Acc) when + L1 =< 9, L2 =< 6; + L1 =< 10, L2 =< 5; + L1 =< 11, L2 =< 4; + L1 =< 12, L2 =< 3; + L1 =< 13, L2 =< 2; + L1 =< 14, L2 =< 1 -> + {binary_to_float(Acc), R}. + +parse_string(<<$\\,$",R/bits>>, Acc) -> + parse_string(R, <>); +parse_string(<<$\\,$\\,R/bits>>, Acc) -> + parse_string(R, <>); +parse_string(<<$",R/bits>>, Acc) -> + {{string, Acc}, R}; +parse_string(<>, Acc) when + C >= 16#20, C =< 16#21; + C >= 16#23, C =< 16#5b; + C >= 16#5d, C =< 16#7e -> + parse_string(R, <>). + +parse_token(<>, Acc) when ?IS_TOKEN(C) or (C =:= $:) or (C =:= $/) -> + parse_token(R, <>); +parse_token(R, Acc) -> + {{token, Acc}, R}. + +parse_binary(<<$*,R/bits>>, Acc) -> + {{binary, base64:decode(Acc)}, R}; +parse_binary(<>, Acc) when ?IS_ALPHANUM(C) or (C =:= $+) or (C =:= $/) or (C =:= $=) -> + parse_binary(R, <>). + +-ifdef(TEST). +struct_hd_test_() -> + Files = filelib:wildcard("deps/structured-header-tests/*.json"), + lists:flatten([begin + {ok, JSON} = file:read_file(File), + Tests = jsx:decode(JSON, [return_maps]), + [ + {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() -> + %% The implementation is strict. We fail whenever we can. + CanFail = maps:get(<<"can_fail">>, Test, false), + MustFail = maps:get(<<"must_fail">>, Test, false), + Expected = case MustFail of + true -> undefined; + false -> expected_to_term(maps:get(<<"expected">>, Test)) + end, + Raw = raw_to_binary(Raw0), + case HeaderType of + <<"dictionary">> when MustFail; CanFail -> + {'EXIT', _} = (catch parse_dictionary(Raw)); + %% The test "binary.json: non-zero pad bits" does not fail + %% due to our reliance on Erlang/OTP's base64 module. + <<"item">> when CanFail -> + case (catch parse_item(Raw)) of + {'EXIT', _} -> ok; + Expected -> ok + end; + <<"item">> when MustFail -> + {'EXIT', _} = (catch parse_item(Raw)); + <<"list">> when MustFail; CanFail -> + {'EXIT', _} = (catch parse_list(Raw)); + <<"dictionary">> -> + Expected = (catch parse_dictionary(Raw)); + <<"item">> -> + Expected = (catch parse_item(Raw)); + <<"list">> -> + Expected = (catch parse_list(Raw)) + end + end} + || Test=#{ + <<"name">> := Name, + <<"header_type">> := HeaderType, + <<"raw">> := Raw0 + } <- Tests] + end || File <- Files]). + +%% Item. +expected_to_term(E=[_, Params]) when is_map(Params) -> + e2t(E); +%% Outer list. +expected_to_term(Expected) when is_list(Expected) -> + [e2t(E) || E <- Expected]; +expected_to_term(Expected) -> + e2t(Expected). + +%% Dictionary. +e2t(Dict) when is_map(Dict) -> + maps:map(fun(_, V) -> e2t(V) end, Dict); +%% Inner list. +e2t([List, Params]) when is_list(List) -> + {with_params, [e2t(E) || E <- List], + maps:map(fun(_, P) -> e2tb(P) end, Params)}; +%% Item. +e2t([Bare, Params]) -> + {with_params, e2tb(Bare), + maps:map(fun(_, P) -> e2tb(P) end, Params)}. + +%% Bare item. +e2tb(#{<<"__type">> := <<"token">>, <<"value">> := V}) -> + {token, V}; +e2tb(#{<<"__type">> := <<"binary">>, <<"value">> := V}) -> + {binary, base32:decode(V)}; +e2tb(V) when is_binary(V) -> + {string, V}; +e2tb(null) -> + undefined; +e2tb(V) -> + V. + +%% The Cowlib parsers currently do not support resuming parsing +%% in the case of multiple headers. To make tests work we modify +%% the raw value the same way Cowboy does when encountering +%% multiple headers: by adding a comma and space in between. +%% +%% Similarly, the Cowlib parsers expect the leading and trailing +%% whitespace to be removed before calling the parser. +raw_to_binary(RawList) -> + trim_ws(iolist_to_binary(lists:join(<<", ">>, RawList))). + +trim_ws(<>) when ?IS_WS(C) -> trim_ws(R); +trim_ws(R) -> trim_ws_end(R, byte_size(R) - 1). + +trim_ws_end(_, -1) -> + <<>>; +trim_ws_end(Value, N) -> + case binary:at(Value, N) of + $\s -> trim_ws_end(Value, N - 1); + $\t -> trim_ws_end(Value, N - 1); + _ -> + S = N + 1, + << Value2:S/binary, _/bits >> = Value, + Value2 + end. +-endif. -- cgit v1.2.3