aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2014-12-29 17:21:16 +0100
committerLoïc Hoguin <[email protected]>2014-12-29 17:24:04 +0100
commit1c732125bfd12fb3a25997f93cdf9e418666bddb (patch)
tree08bdd4c839aef37e52181d9ca615d69da894e83a
parent779f4ad51c665092b1da5c8028dad6d62a5f76f0 (diff)
downloadcowlib-1c732125bfd12fb3a25997f93cdf9e418666bddb.tar.gz
cowlib-1c732125bfd12fb3a25997f93cdf9e418666bddb.tar.bz2
cowlib-1c732125bfd12fb3a25997f93cdf9e418666bddb.zip
Add cow_http_hd:parse_host/1, remove cow_http:parse_fullhost/1
From RFC7230 and RFC3986. The new function now validates that the characters are correct, but does not go as far as validate segment sizes or number of segments. Its main purpose is still to split host and port.
-rw-r--r--include/cow_inline.hrl30
-rw-r--r--src/cow_http.erl67
-rw-r--r--src/cow_http_hd.erl91
3 files changed, 117 insertions, 71 deletions
diff --git a/include/cow_inline.hrl b/include/cow_inline.hrl
index 649ff79..3abb018 100644
--- a/include/cow_inline.hrl
+++ b/include/cow_inline.hrl
@@ -32,6 +32,10 @@
C =:= $Z
).
+%% IS_ALPHANUM(Character)
+
+-define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
+
%% IS_CHAR(Character)
-define(IS_CHAR(C), C > 0, C < 128).
@@ -43,14 +47,19 @@
C =:= $5 orelse C =:= $6 orelse C =:= $7 orelse C =:= $8 orelse C =:= $9
).
-%% IS_ALPHANUM(Character)
-
--define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
-
%% IS_ETAGC(Character)
-define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f).
+%% IS_HEX(Character)
+
+-define(IS_HEX(C),
+ ?IS_DIGIT(C) orelse
+ C =:= $a orelse C =:= $b orelse C =:= $c orelse
+ C =:= $d orelse C =:= $e orelse C =:= $f orelse
+ C =:= $A orelse C =:= $B orelse C =:= $C orelse
+ C =:= $D orelse C =:= $E orelse C =:= $F).
+
%% IS_TOKEN(Character)
-define(IS_TOKEN(C),
@@ -60,6 +69,19 @@
orelse C =:= $^ orelse C =:= $_ orelse C =:= $` orelse C =:= $| orelse C =:= $~
).
+%% IS_URI_UNRESERVED(Character)
+
+-define(IS_URI_UNRESERVED(C),
+ ?IS_ALPHA(C) orelse ?IS_DIGIT(C) orelse
+ C =:= $- orelse C =:= $. orelse C =:= $_ orelse C =:= $~).
+
+%% IS_URI_SUB_DELIMS(Character)
+
+-define(IS_URI_SUB_DELIMS(C),
+ C =:= $! orelse C =:= $$ orelse C =:= $& orelse C =:= $' orelse
+ C =:= $( orelse C =:= $) orelse C =:= $* orelse C =:= $+ orelse
+ C =:= $, orelse C =:= $; orelse C =:= $=).
+
%% IS_VCHAR(Character)
-define(IS_VCHAR(C), C =:= $\t; C > 31, C < 127).
diff --git a/src/cow_http.erl b/src/cow_http.erl
index 8504a21..7bb0562 100644
--- a/src/cow_http.erl
+++ b/src/cow_http.erl
@@ -18,7 +18,6 @@
-export([parse_status_line/1]).
-export([parse_headers/1]).
--export([parse_fullhost/1]).
-export([parse_fullpath/1]).
-export([parse_version/1]).
@@ -200,72 +199,6 @@ horse_parse_headers() ->
).
-endif.
-%% @doc Extract host and port from a binary.
-%%
-%% Because the hostname is case insensitive it is converted
-%% to lowercase.
-
--spec parse_fullhost(binary()) -> {binary(), undefined | non_neg_integer()}.
-parse_fullhost(<< $[, Rest/bits >>) ->
- parse_fullhost_ipv6(Rest, << $[ >>);
-parse_fullhost(Fullhost) ->
- parse_fullhost(Fullhost, <<>>).
-
-parse_fullhost_ipv6(<< $] >>, Acc) ->
- {<< Acc/binary, $] >>, undefined};
-parse_fullhost_ipv6(<< $], $:, Rest/bits >>, Acc) ->
- {<< Acc/binary, $] >>, binary_to_integer(Rest)};
-parse_fullhost_ipv6(<< C, Rest/bits >>, Acc) ->
- case C of
- ?INLINE_LOWERCASE(parse_fullhost_ipv6, Rest, Acc)
- end.
-
-parse_fullhost(<<>>, Acc) ->
- {Acc, undefined};
-parse_fullhost(<< $:, Rest/bits >>, Acc) ->
- {Acc, binary_to_integer(Rest)};
-parse_fullhost(<< C, Rest/bits >>, Acc) ->
- case C of
- ?INLINE_LOWERCASE(parse_fullhost, Rest, Acc)
- end.
-
--ifdef(TEST).
-parse_fullhost_test() ->
- {<<"example.org">>, 8080} = parse_fullhost(<<"example.org:8080">>),
- {<<"example.org">>, undefined} = parse_fullhost(<<"example.org">>),
- {<<"192.0.2.1">>, 8080} = parse_fullhost(<<"192.0.2.1:8080">>),
- {<<"192.0.2.1">>, undefined} = parse_fullhost(<<"192.0.2.1">>),
- {<<"[2001:db8::1]">>, 8080} = parse_fullhost(<<"[2001:db8::1]:8080">>),
- {<<"[2001:db8::1]">>, undefined} = parse_fullhost(<<"[2001:db8::1]">>),
- {<<"[::ffff:192.0.2.1]">>, 8080}
- = parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>),
- {<<"[::ffff:192.0.2.1]">>, undefined}
- = parse_fullhost(<<"[::ffff:192.0.2.1]">>),
- ok.
--endif.
-
--ifdef(PERF).
-horse_parse_fullhost_blue_example_org() ->
- horse:repeat(200000,
- parse_fullhost(<<"blue.example.org:8080">>)
- ).
-
-horse_parse_fullhost_ipv4() ->
- horse:repeat(200000,
- parse_fullhost(<<"192.0.2.1:8080">>)
- ).
-
-horse_parse_fullhost_ipv6() ->
- horse:repeat(200000,
- parse_fullhost(<<"[2001:db8::1]:8080">>)
- ).
-
-horse_parse_fullhost_ipv6_v4() ->
- horse:repeat(200000,
- parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>)
- ).
--endif.
-
%% @doc Extract path and query string from a binary.
-spec parse_fullpath(binary()) -> {binary(), binary()}.
diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl
index 848b8c9..bd0a258 100644
--- a/src/cow_http_hd.erl
+++ b/src/cow_http_hd.erl
@@ -32,6 +32,7 @@
-export([parse_etag/1]).
-export([parse_expect/1]).
-export([parse_expires/1]).
+-export([parse_host/1]).
-export([parse_if_match/1]).
-export([parse_if_modified_since/1]).
-export([parse_if_none_match/1]).
@@ -1846,6 +1847,96 @@ horse_parse_expires_invalid() ->
).
-endif.
+%% @doc Parse the Host header.
+%%
+%% We only seek to have legal characters and separate the
+%% host and port values. The number of segments in the host
+%% or the size of each segment is not checked.
+%%
+%% There is no way to distinguish IPv4 addresses from regular
+%% names until the last segment is reached therefore we do not
+%% differentiate them.
+%%
+%% The following valid hosts are currently rejected: IPv6
+%% addresses with a zone identifier; IPvFuture addresses;
+%% and percent-encoded addresses.
+
+-spec parse_host(binary()) -> {binary(), 0..65535 | undefined}.
+parse_host(<< $[, R/bits >>) ->
+ ipv6_address(R, << $[ >>);
+parse_host(Host) ->
+ reg_name(Host, <<>>).
+
+ipv6_address(<< $] >>, IP) -> {<< IP/binary, $] >>, undefined};
+ipv6_address(<< $], $:, Port/bits >>, IP) -> {<< IP/binary, $] >>, binary_to_integer(Port)};
+ipv6_address(<< C, R/bits >>, IP) when ?IS_HEX(C) orelse C =:= $: orelse C =:= $. ->
+ case C of
+ ?INLINE_LOWERCASE(ipv6_address, R, IP)
+ end.
+
+reg_name(<<>>, Name) -> {Name, undefined};
+reg_name(<< $:, Port/bits >>, Name) -> {Name, binary_to_integer(Port)};
+reg_name(<< C, R/bits >>, Name) when ?IS_URI_UNRESERVED(C) orelse ?IS_URI_SUB_DELIMS(C) ->
+ case C of
+ ?INLINE_LOWERCASE(reg_name, R, Name)
+ end.
+
+-ifdef(TEST).
+host_chars() -> "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~".
+host() -> vector(1, 255, elements(host_chars())).
+
+host_port() ->
+ ?LET({Host, Port},
+ {host(), oneof([undefined, int(1, 65535)])},
+ begin
+ HostBin = list_to_binary(Host),
+ {{?INLINE_LOWERCASE_BC(HostBin), Port},
+ case Port of
+ undefined -> HostBin;
+ _ -> << HostBin/binary, $:, (integer_to_binary(Port))/binary >>
+ end}
+ end).
+
+prop_parse_host() ->
+ ?FORALL({Res, Host}, host_port(), Res =:= parse_host(Host)).
+
+parse_host_test_() ->
+ Tests = [
+ {<<>>, {<<>>, undefined}},
+ {<<"www.example.org:8080">>, {<<"www.example.org">>, 8080}},
+ {<<"www.example.org">>, {<<"www.example.org">>, undefined}},
+ {<<"192.0.2.1:8080">>, {<<"192.0.2.1">>, 8080}},
+ {<<"192.0.2.1">>, {<<"192.0.2.1">>, undefined}},
+ {<<"[2001:db8::1]:8080">>, {<<"[2001:db8::1]">>, 8080}},
+ {<<"[2001:db8::1]">>, {<<"[2001:db8::1]">>, undefined}},
+ {<<"[::ffff:192.0.2.1]:8080">>, {<<"[::ffff:192.0.2.1]">>, 8080}},
+ {<<"[::ffff:192.0.2.1]">>, {<<"[::ffff:192.0.2.1]">>, undefined}}
+ ],
+ [{V, fun() -> R = parse_host(V) end} || {V, R} <- Tests].
+-endif.
+
+-ifdef(PERF).
+horse_parse_host_blue_example_org() ->
+ horse:repeat(200000,
+ parse_host(<<"blue.example.org:8080">>)
+ ).
+
+horse_parse_host_ipv4() ->
+ horse:repeat(200000,
+ parse_host(<<"192.0.2.1:8080">>)
+ ).
+
+horse_parse_host_ipv6() ->
+ horse:repeat(200000,
+ parse_host(<<"[2001:db8::1]:8080">>)
+ ).
+
+horse_parse_host_ipv6_v4() ->
+ horse:repeat(200000,
+ parse_host(<<"[::ffff:192.0.2.1]:8080">>)
+ ).
+-endif.
+
%% @doc Parse the If-Match header.
-spec parse_if_match(binary()) -> '*' | [etag()].