diff options
-rw-r--r-- | include/cow_inline.hrl | 30 | ||||
-rw-r--r-- | src/cow_http.erl | 67 | ||||
-rw-r--r-- | src/cow_http_hd.erl | 91 |
3 files changed, 117 insertions, 71 deletions
diff --git a/include/cow_inline.hrl b/include/cow_inline.hrl index 649ff79..3abb018 100644 --- a/include/cow_inline.hrl +++ b/include/cow_inline.hrl @@ -32,6 +32,10 @@ C =:= $Z ). +%% IS_ALPHANUM(Character) + +-define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)). + %% IS_CHAR(Character) -define(IS_CHAR(C), C > 0, C < 128). @@ -43,14 +47,19 @@ C =:= $5 orelse C =:= $6 orelse C =:= $7 orelse C =:= $8 orelse C =:= $9 ). -%% IS_ALPHANUM(Character) - --define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)). - %% IS_ETAGC(Character) -define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f). +%% IS_HEX(Character) + +-define(IS_HEX(C), + ?IS_DIGIT(C) orelse + C =:= $a orelse C =:= $b orelse C =:= $c orelse + C =:= $d orelse C =:= $e orelse C =:= $f orelse + C =:= $A orelse C =:= $B orelse C =:= $C orelse + C =:= $D orelse C =:= $E orelse C =:= $F). + %% IS_TOKEN(Character) -define(IS_TOKEN(C), @@ -60,6 +69,19 @@ orelse C =:= $^ orelse C =:= $_ orelse C =:= $` orelse C =:= $| orelse C =:= $~ ). +%% IS_URI_UNRESERVED(Character) + +-define(IS_URI_UNRESERVED(C), + ?IS_ALPHA(C) orelse ?IS_DIGIT(C) orelse + C =:= $- orelse C =:= $. orelse C =:= $_ orelse C =:= $~). + +%% IS_URI_SUB_DELIMS(Character) + +-define(IS_URI_SUB_DELIMS(C), + C =:= $! orelse C =:= $$ orelse C =:= $& orelse C =:= $' orelse + C =:= $( orelse C =:= $) orelse C =:= $* orelse C =:= $+ orelse + C =:= $, orelse C =:= $; orelse C =:= $=). + %% IS_VCHAR(Character) -define(IS_VCHAR(C), C =:= $\t; C > 31, C < 127). diff --git a/src/cow_http.erl b/src/cow_http.erl index 8504a21..7bb0562 100644 --- a/src/cow_http.erl +++ b/src/cow_http.erl @@ -18,7 +18,6 @@ -export([parse_status_line/1]). -export([parse_headers/1]). --export([parse_fullhost/1]). -export([parse_fullpath/1]). -export([parse_version/1]). @@ -200,72 +199,6 @@ horse_parse_headers() -> ). -endif. -%% @doc Extract host and port from a binary. -%% -%% Because the hostname is case insensitive it is converted -%% to lowercase. - --spec parse_fullhost(binary()) -> {binary(), undefined | non_neg_integer()}. -parse_fullhost(<< $[, Rest/bits >>) -> - parse_fullhost_ipv6(Rest, << $[ >>); -parse_fullhost(Fullhost) -> - parse_fullhost(Fullhost, <<>>). - -parse_fullhost_ipv6(<< $] >>, Acc) -> - {<< Acc/binary, $] >>, undefined}; -parse_fullhost_ipv6(<< $], $:, Rest/bits >>, Acc) -> - {<< Acc/binary, $] >>, binary_to_integer(Rest)}; -parse_fullhost_ipv6(<< C, Rest/bits >>, Acc) -> - case C of - ?INLINE_LOWERCASE(parse_fullhost_ipv6, Rest, Acc) - end. - -parse_fullhost(<<>>, Acc) -> - {Acc, undefined}; -parse_fullhost(<< $:, Rest/bits >>, Acc) -> - {Acc, binary_to_integer(Rest)}; -parse_fullhost(<< C, Rest/bits >>, Acc) -> - case C of - ?INLINE_LOWERCASE(parse_fullhost, Rest, Acc) - end. - --ifdef(TEST). -parse_fullhost_test() -> - {<<"example.org">>, 8080} = parse_fullhost(<<"example.org:8080">>), - {<<"example.org">>, undefined} = parse_fullhost(<<"example.org">>), - {<<"192.0.2.1">>, 8080} = parse_fullhost(<<"192.0.2.1:8080">>), - {<<"192.0.2.1">>, undefined} = parse_fullhost(<<"192.0.2.1">>), - {<<"[2001:db8::1]">>, 8080} = parse_fullhost(<<"[2001:db8::1]:8080">>), - {<<"[2001:db8::1]">>, undefined} = parse_fullhost(<<"[2001:db8::1]">>), - {<<"[::ffff:192.0.2.1]">>, 8080} - = parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>), - {<<"[::ffff:192.0.2.1]">>, undefined} - = parse_fullhost(<<"[::ffff:192.0.2.1]">>), - ok. --endif. - --ifdef(PERF). -horse_parse_fullhost_blue_example_org() -> - horse:repeat(200000, - parse_fullhost(<<"blue.example.org:8080">>) - ). - -horse_parse_fullhost_ipv4() -> - horse:repeat(200000, - parse_fullhost(<<"192.0.2.1:8080">>) - ). - -horse_parse_fullhost_ipv6() -> - horse:repeat(200000, - parse_fullhost(<<"[2001:db8::1]:8080">>) - ). - -horse_parse_fullhost_ipv6_v4() -> - horse:repeat(200000, - parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>) - ). --endif. - %% @doc Extract path and query string from a binary. -spec parse_fullpath(binary()) -> {binary(), binary()}. diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl index 848b8c9..bd0a258 100644 --- a/src/cow_http_hd.erl +++ b/src/cow_http_hd.erl @@ -32,6 +32,7 @@ -export([parse_etag/1]). -export([parse_expect/1]). -export([parse_expires/1]). +-export([parse_host/1]). -export([parse_if_match/1]). -export([parse_if_modified_since/1]). -export([parse_if_none_match/1]). @@ -1846,6 +1847,96 @@ horse_parse_expires_invalid() -> ). -endif. +%% @doc Parse the Host header. +%% +%% We only seek to have legal characters and separate the +%% host and port values. The number of segments in the host +%% or the size of each segment is not checked. +%% +%% There is no way to distinguish IPv4 addresses from regular +%% names until the last segment is reached therefore we do not +%% differentiate them. +%% +%% The following valid hosts are currently rejected: IPv6 +%% addresses with a zone identifier; IPvFuture addresses; +%% and percent-encoded addresses. + +-spec parse_host(binary()) -> {binary(), 0..65535 | undefined}. +parse_host(<< $[, R/bits >>) -> + ipv6_address(R, << $[ >>); +parse_host(Host) -> + reg_name(Host, <<>>). + +ipv6_address(<< $] >>, IP) -> {<< IP/binary, $] >>, undefined}; +ipv6_address(<< $], $:, Port/bits >>, IP) -> {<< IP/binary, $] >>, binary_to_integer(Port)}; +ipv6_address(<< C, R/bits >>, IP) when ?IS_HEX(C) orelse C =:= $: orelse C =:= $. -> + case C of + ?INLINE_LOWERCASE(ipv6_address, R, IP) + end. + +reg_name(<<>>, Name) -> {Name, undefined}; +reg_name(<< $:, Port/bits >>, Name) -> {Name, binary_to_integer(Port)}; +reg_name(<< C, R/bits >>, Name) when ?IS_URI_UNRESERVED(C) orelse ?IS_URI_SUB_DELIMS(C) -> + case C of + ?INLINE_LOWERCASE(reg_name, R, Name) + end. + +-ifdef(TEST). +host_chars() -> "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~". +host() -> vector(1, 255, elements(host_chars())). + +host_port() -> + ?LET({Host, Port}, + {host(), oneof([undefined, int(1, 65535)])}, + begin + HostBin = list_to_binary(Host), + {{?INLINE_LOWERCASE_BC(HostBin), Port}, + case Port of + undefined -> HostBin; + _ -> << HostBin/binary, $:, (integer_to_binary(Port))/binary >> + end} + end). + +prop_parse_host() -> + ?FORALL({Res, Host}, host_port(), Res =:= parse_host(Host)). + +parse_host_test_() -> + Tests = [ + {<<>>, {<<>>, undefined}}, + {<<"www.example.org:8080">>, {<<"www.example.org">>, 8080}}, + {<<"www.example.org">>, {<<"www.example.org">>, undefined}}, + {<<"192.0.2.1:8080">>, {<<"192.0.2.1">>, 8080}}, + {<<"192.0.2.1">>, {<<"192.0.2.1">>, undefined}}, + {<<"[2001:db8::1]:8080">>, {<<"[2001:db8::1]">>, 8080}}, + {<<"[2001:db8::1]">>, {<<"[2001:db8::1]">>, undefined}}, + {<<"[::ffff:192.0.2.1]:8080">>, {<<"[::ffff:192.0.2.1]">>, 8080}}, + {<<"[::ffff:192.0.2.1]">>, {<<"[::ffff:192.0.2.1]">>, undefined}} + ], + [{V, fun() -> R = parse_host(V) end} || {V, R} <- Tests]. +-endif. + +-ifdef(PERF). +horse_parse_host_blue_example_org() -> + horse:repeat(200000, + parse_host(<<"blue.example.org:8080">>) + ). + +horse_parse_host_ipv4() -> + horse:repeat(200000, + parse_host(<<"192.0.2.1:8080">>) + ). + +horse_parse_host_ipv6() -> + horse:repeat(200000, + parse_host(<<"[2001:db8::1]:8080">>) + ). + +horse_parse_host_ipv6_v4() -> + horse:repeat(200000, + parse_host(<<"[::ffff:192.0.2.1]:8080">>) + ). +-endif. + %% @doc Parse the If-Match header. -spec parse_if_match(binary()) -> '*' | [etag()]. |