aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/cow_inline.hrl30
-rw-r--r--src/cow_http.erl67
-rw-r--r--src/cow_http_hd.erl91
3 files changed, 117 insertions, 71 deletions
diff --git a/include/cow_inline.hrl b/include/cow_inline.hrl
index 649ff79..3abb018 100644
--- a/include/cow_inline.hrl
+++ b/include/cow_inline.hrl
@@ -32,6 +32,10 @@
C =:= $Z
).
+%% IS_ALPHANUM(Character)
+
+-define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
+
%% IS_CHAR(Character)
-define(IS_CHAR(C), C > 0, C < 128).
@@ -43,14 +47,19 @@
C =:= $5 orelse C =:= $6 orelse C =:= $7 orelse C =:= $8 orelse C =:= $9
).
-%% IS_ALPHANUM(Character)
-
--define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
-
%% IS_ETAGC(Character)
-define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f).
+%% IS_HEX(Character)
+
+-define(IS_HEX(C),
+ ?IS_DIGIT(C) orelse
+ C =:= $a orelse C =:= $b orelse C =:= $c orelse
+ C =:= $d orelse C =:= $e orelse C =:= $f orelse
+ C =:= $A orelse C =:= $B orelse C =:= $C orelse
+ C =:= $D orelse C =:= $E orelse C =:= $F).
+
%% IS_TOKEN(Character)
-define(IS_TOKEN(C),
@@ -60,6 +69,19 @@
orelse C =:= $^ orelse C =:= $_ orelse C =:= $` orelse C =:= $| orelse C =:= $~
).
+%% IS_URI_UNRESERVED(Character)
+
+-define(IS_URI_UNRESERVED(C),
+ ?IS_ALPHA(C) orelse ?IS_DIGIT(C) orelse
+ C =:= $- orelse C =:= $. orelse C =:= $_ orelse C =:= $~).
+
+%% IS_URI_SUB_DELIMS(Character)
+
+-define(IS_URI_SUB_DELIMS(C),
+ C =:= $! orelse C =:= $$ orelse C =:= $& orelse C =:= $' orelse
+ C =:= $( orelse C =:= $) orelse C =:= $* orelse C =:= $+ orelse
+ C =:= $, orelse C =:= $; orelse C =:= $=).
+
%% IS_VCHAR(Character)
-define(IS_VCHAR(C), C =:= $\t; C > 31, C < 127).
diff --git a/src/cow_http.erl b/src/cow_http.erl
index 8504a21..7bb0562 100644
--- a/src/cow_http.erl
+++ b/src/cow_http.erl
@@ -18,7 +18,6 @@
-export([parse_status_line/1]).
-export([parse_headers/1]).
--export([parse_fullhost/1]).
-export([parse_fullpath/1]).
-export([parse_version/1]).
@@ -200,72 +199,6 @@ horse_parse_headers() ->
).
-endif.
-%% @doc Extract host and port from a binary.
-%%
-%% Because the hostname is case insensitive it is converted
-%% to lowercase.
-
--spec parse_fullhost(binary()) -> {binary(), undefined | non_neg_integer()}.
-parse_fullhost(<< $[, Rest/bits >>) ->
- parse_fullhost_ipv6(Rest, << $[ >>);
-parse_fullhost(Fullhost) ->
- parse_fullhost(Fullhost, <<>>).
-
-parse_fullhost_ipv6(<< $] >>, Acc) ->
- {<< Acc/binary, $] >>, undefined};
-parse_fullhost_ipv6(<< $], $:, Rest/bits >>, Acc) ->
- {<< Acc/binary, $] >>, binary_to_integer(Rest)};
-parse_fullhost_ipv6(<< C, Rest/bits >>, Acc) ->
- case C of
- ?INLINE_LOWERCASE(parse_fullhost_ipv6, Rest, Acc)
- end.
-
-parse_fullhost(<<>>, Acc) ->
- {Acc, undefined};
-parse_fullhost(<< $:, Rest/bits >>, Acc) ->
- {Acc, binary_to_integer(Rest)};
-parse_fullhost(<< C, Rest/bits >>, Acc) ->
- case C of
- ?INLINE_LOWERCASE(parse_fullhost, Rest, Acc)
- end.
-
--ifdef(TEST).
-parse_fullhost_test() ->
- {<<"example.org">>, 8080} = parse_fullhost(<<"example.org:8080">>),
- {<<"example.org">>, undefined} = parse_fullhost(<<"example.org">>),
- {<<"192.0.2.1">>, 8080} = parse_fullhost(<<"192.0.2.1:8080">>),
- {<<"192.0.2.1">>, undefined} = parse_fullhost(<<"192.0.2.1">>),
- {<<"[2001:db8::1]">>, 8080} = parse_fullhost(<<"[2001:db8::1]:8080">>),
- {<<"[2001:db8::1]">>, undefined} = parse_fullhost(<<"[2001:db8::1]">>),
- {<<"[::ffff:192.0.2.1]">>, 8080}
- = parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>),
- {<<"[::ffff:192.0.2.1]">>, undefined}
- = parse_fullhost(<<"[::ffff:192.0.2.1]">>),
- ok.
--endif.
-
--ifdef(PERF).
-horse_parse_fullhost_blue_example_org() ->
- horse:repeat(200000,
- parse_fullhost(<<"blue.example.org:8080">>)
- ).
-
-horse_parse_fullhost_ipv4() ->
- horse:repeat(200000,
- parse_fullhost(<<"192.0.2.1:8080">>)
- ).
-
-horse_parse_fullhost_ipv6() ->
- horse:repeat(200000,
- parse_fullhost(<<"[2001:db8::1]:8080">>)
- ).
-
-horse_parse_fullhost_ipv6_v4() ->
- horse:repeat(200000,
- parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>)
- ).
--endif.
-
%% @doc Extract path and query string from a binary.
-spec parse_fullpath(binary()) -> {binary(), binary()}.
diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl
index 848b8c9..bd0a258 100644
--- a/src/cow_http_hd.erl
+++ b/src/cow_http_hd.erl
@@ -32,6 +32,7 @@
-export([parse_etag/1]).
-export([parse_expect/1]).
-export([parse_expires/1]).
+-export([parse_host/1]).
-export([parse_if_match/1]).
-export([parse_if_modified_since/1]).
-export([parse_if_none_match/1]).
@@ -1846,6 +1847,96 @@ horse_parse_expires_invalid() ->
).
-endif.
+%% @doc Parse the Host header.
+%%
+%% We only seek to have legal characters and separate the
+%% host and port values. The number of segments in the host
+%% or the size of each segment is not checked.
+%%
+%% There is no way to distinguish IPv4 addresses from regular
+%% names until the last segment is reached therefore we do not
+%% differentiate them.
+%%
+%% The following valid hosts are currently rejected: IPv6
+%% addresses with a zone identifier; IPvFuture addresses;
+%% and percent-encoded addresses.
+
+-spec parse_host(binary()) -> {binary(), 0..65535 | undefined}.
+parse_host(<< $[, R/bits >>) ->
+ ipv6_address(R, << $[ >>);
+parse_host(Host) ->
+ reg_name(Host, <<>>).
+
+ipv6_address(<< $] >>, IP) -> {<< IP/binary, $] >>, undefined};
+ipv6_address(<< $], $:, Port/bits >>, IP) -> {<< IP/binary, $] >>, binary_to_integer(Port)};
+ipv6_address(<< C, R/bits >>, IP) when ?IS_HEX(C) orelse C =:= $: orelse C =:= $. ->
+ case C of
+ ?INLINE_LOWERCASE(ipv6_address, R, IP)
+ end.
+
+reg_name(<<>>, Name) -> {Name, undefined};
+reg_name(<< $:, Port/bits >>, Name) -> {Name, binary_to_integer(Port)};
+reg_name(<< C, R/bits >>, Name) when ?IS_URI_UNRESERVED(C) orelse ?IS_URI_SUB_DELIMS(C) ->
+ case C of
+ ?INLINE_LOWERCASE(reg_name, R, Name)
+ end.
+
+-ifdef(TEST).
+host_chars() -> "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~".
+host() -> vector(1, 255, elements(host_chars())).
+
+host_port() ->
+ ?LET({Host, Port},
+ {host(), oneof([undefined, int(1, 65535)])},
+ begin
+ HostBin = list_to_binary(Host),
+ {{?INLINE_LOWERCASE_BC(HostBin), Port},
+ case Port of
+ undefined -> HostBin;
+ _ -> << HostBin/binary, $:, (integer_to_binary(Port))/binary >>
+ end}
+ end).
+
+prop_parse_host() ->
+ ?FORALL({Res, Host}, host_port(), Res =:= parse_host(Host)).
+
+parse_host_test_() ->
+ Tests = [
+ {<<>>, {<<>>, undefined}},
+ {<<"www.example.org:8080">>, {<<"www.example.org">>, 8080}},
+ {<<"www.example.org">>, {<<"www.example.org">>, undefined}},
+ {<<"192.0.2.1:8080">>, {<<"192.0.2.1">>, 8080}},
+ {<<"192.0.2.1">>, {<<"192.0.2.1">>, undefined}},
+ {<<"[2001:db8::1]:8080">>, {<<"[2001:db8::1]">>, 8080}},
+ {<<"[2001:db8::1]">>, {<<"[2001:db8::1]">>, undefined}},
+ {<<"[::ffff:192.0.2.1]:8080">>, {<<"[::ffff:192.0.2.1]">>, 8080}},
+ {<<"[::ffff:192.0.2.1]">>, {<<"[::ffff:192.0.2.1]">>, undefined}}
+ ],
+ [{V, fun() -> R = parse_host(V) end} || {V, R} <- Tests].
+-endif.
+
+-ifdef(PERF).
+horse_parse_host_blue_example_org() ->
+ horse:repeat(200000,
+ parse_host(<<"blue.example.org:8080">>)
+ ).
+
+horse_parse_host_ipv4() ->
+ horse:repeat(200000,
+ parse_host(<<"192.0.2.1:8080">>)
+ ).
+
+horse_parse_host_ipv6() ->
+ horse:repeat(200000,
+ parse_host(<<"[2001:db8::1]:8080">>)
+ ).
+
+horse_parse_host_ipv6_v4() ->
+ horse:repeat(200000,
+ parse_host(<<"[::ffff:192.0.2.1]:8080">>)
+ ).
+-endif.
+
%% @doc Parse the If-Match header.
-spec parse_if_match(binary()) -> '*' | [etag()].