diff options
author | Péter Dimitrov <[email protected]> | 2017-09-14 14:25:47 +0200 |
---|---|---|
committer | Péter Dimitrov <[email protected]> | 2017-10-23 15:53:28 +0200 |
commit | ec3f0c7f96531b714082f5af694a7ed6a02769ce (patch) | |
tree | e248a9d3144457826b1eace1c9783b6629cd3f68 /lib/stdlib/src/uri_string.erl | |
parent | 29a9dd0e17a97a3e6e46f0d08c6ba8f31db33f5e (diff) | |
download | otp-ec3f0c7f96531b714082f5af694a7ed6a02769ce.tar.gz otp-ec3f0c7f96531b714082f5af694a7ed6a02769ce.tar.bz2 otp-ec3f0c7f96531b714082f5af694a7ed6a02769ce.zip |
stdlib: Add support for parsing IPv4 and IPv6
Diffstat (limited to 'lib/stdlib/src/uri_string.erl')
-rwxr-xr-x | lib/stdlib/src/uri_string.erl | 246 |
1 files changed, 237 insertions, 9 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 619da24cbc..3656d561be 100755 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -388,7 +388,7 @@ parse_relative_part(?STRING_REST("//", Rest), URI) -> throw:uri_parse_error -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), - URI1#{host => Host} + URI1#{host => remove_brackets(Host)} end; parse_relative_part(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute @@ -669,7 +669,7 @@ parse_hier(?STRING_REST("//", Rest), URI) -> throw:uri_parse_error -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{host => Host}} + {Rest, URI1#{host => remove_brackets(Host)}} end; parse_hier(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute @@ -756,7 +756,7 @@ parse_userinfo(?CHAR($@), _URI) -> parse_userinfo(?STRING_REST($@, Rest), URI) -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{host => Host}}; + {Rest, URI1#{host => remove_brackets(Host)}}; parse_userinfo(?STRING_REST(Char, Rest), URI) -> case is_userinfo(Char) of true -> parse_userinfo(Rest, URI); @@ -834,7 +834,6 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). %% %% reg-name = *( unreserved / pct-encoded / sub-delims ) %%------------------------------------------------------------------------- -%% TODO: implement parsing of IPv4/IPv6 addresses -spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. parse_host(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), @@ -849,14 +848,16 @@ parse_host(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_host(?STRING_REST($[, Rest), URI) -> + parse_ipv6_bin(Rest, [], URI); parse_host(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), {Rest, URI1#{fragment => Fragment}}; parse_host(?STRING_REST(Char, Rest), URI) -> - case is_reg_name(Char) of - true -> parse_host(Rest, URI); - false -> throw(uri_parse_error) + case is_digit(Char) of + true -> parse_ipv4_bin(Rest, [Char], URI); + false -> parse_reg_name(?STRING_REST(Char, Rest), URI) end; parse_host(?STRING_EMPTY, URI) -> {?STRING_EMPTY, URI}. @@ -877,12 +878,65 @@ parse_host([$?|Rest], Acc, URI) -> parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query parse_host([$#|Rest], Acc, URI) -> parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty +parse_host([$[|Rest], _Acc, URI) -> + parse_ipv6(Rest, [], URI); parse_host([Char|Rest], Acc, URI) -> + case is_digit(Char) of + true -> parse_ipv4(Rest, [Char|Acc], URI); + false -> parse_reg_name([Char|Rest], Acc, URI) + end; +parse_host([], Acc, URI) -> + URI#{host => lists:reverse(Acc)}. + + +-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. +parse_reg_name(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + Port = binary_to_integer(H), + {Rest, URI1#{port => Port}}; +parse_reg_name(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_reg_name(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_reg_name(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_reg_name(?STRING_REST(Char, Rest), URI) -> case is_reg_name(Char) of - true -> parse_host(Rest, [Char|Acc], URI); + true -> parse_reg_name(Rest, URI); false -> throw(uri_parse_error) end; -parse_host([], Acc, URI) -> +parse_reg_name(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_reg_name(iolist(), list(), uri_map()) -> uri_map(). +parse_reg_name(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_reg_name(unicode:characters_to_list(Str), Acc, URI); +parse_reg_name([H|Rest], Acc, URI) when is_binary(H) -> + parse_reg_name(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_reg_name([H|Rest], Acc, URI) when is_list(H) -> + parse_reg_name(H ++ Rest, Acc, URI); +parse_reg_name([$:|Rest], Acc, URI) -> + parse_port(Rest, [], URI#{host => lists:reverse(Acc)}); +parse_reg_name([$/|Rest], Acc, URI) -> + parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty +parse_reg_name([$?|Rest], Acc, URI) -> + parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query +parse_reg_name([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty +parse_reg_name([Char|Rest], Acc, URI) -> + case is_reg_name(Char) of + true -> parse_reg_name(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_reg_name([], Acc, URI) -> URI#{host => lists:reverse(Acc)}. %% Check if char is allowed in reg-name @@ -891,6 +945,168 @@ is_reg_name($%) -> true; is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). +-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_port(Rest, URI), + {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + Port = binary_to_integer(H), + {Rest, URI1#{port => Port}}; +parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_segment(Rest, URI), % path-abempty + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv4(Char) of + true -> parse_ipv4_bin(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {?STRING_EMPTY, URI}. + +-spec parse_ipv4(iolist(), list(), uri_map()) -> uri_map(). +parse_ipv4(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_ipv4(unicode:characters_to_list(Str), Acc, URI); +parse_ipv4([H|Rest], Acc, URI) when is_binary(H) -> + parse_ipv4(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_ipv4([H|Rest], Acc, URI) when is_list(H) -> + parse_ipv4(H ++ Rest, Acc, URI); +parse_ipv4([$:|Rest], Acc, URI) -> + parse_port(Rest, [], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); +parse_ipv4([$/|Rest], Acc, URI) -> + parse_segment(Rest, [$/], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-abempty +parse_ipv4([$?|Rest], Acc, URI) -> + parse_query(Rest, [$?], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-empty ?query +parse_ipv4([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-empty +parse_ipv4([Char|Rest], Acc, URI) -> + case is_ipv4(Char) of + true -> parse_ipv4(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_ipv4([], Acc, URI) -> + URI#{host => validate_ipv4_address(lists:reverse(Acc))}. + +%% Check if char is allowed in IPv4 addresses +-spec is_ipv4(char()) -> boolean(). +is_ipv4($.) -> true; +is_ipv4(Char) -> is_digit(Char). + +-spec validate_ipv4_address(list()) -> list(). +validate_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw(uri_parse_error) + end. + + +-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> + _ = validate_ipv6_address(lists:reverse(Acc)), + parse_ipv6_bin_end(Rest, URI); +parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> + throw(uri_parse_error). + +-spec parse_ipv6(iolist(), list(), uri_map()) -> uri_map(). +parse_ipv6(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_ipv6(unicode:characters_to_list(Str), Acc, URI); +parse_ipv6([H|Rest], Acc, URI) when is_binary(H) -> + parse_ipv6(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_ipv6([H|Rest], Acc, URI) when is_list(H) -> + parse_ipv6(H ++ Rest, Acc, URI); +parse_ipv6([$]|Rest], Acc, URI) -> + parse_ipv6_end(Rest, [], URI#{host => validate_ipv6_address(lists:reverse(Acc))}); +parse_ipv6([Char|Rest], Acc, URI) -> + case is_ipv6(Char) of + true -> parse_ipv6(Rest, [Char|Acc], URI); + false -> + io:format("# DEBUG Char: >>~c<<~n", [Char]), + io:format("# DEBUG Rest: >>~s<<~n", [Rest]), + throw(uri_parse_error) + end; +parse_ipv6([], _Acc, _URI) -> + throw(uri_parse_error). + +%% Check if char is allowed in IPv6 addresses +-spec is_ipv6(char()) -> boolean(). +is_ipv6($:) -> true; +is_ipv6($.) -> true; +is_ipv6(Char) -> is_hex_digit(Char). + + +-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + Port = binary_to_integer(H), + {Rest, URI1#{port => Port}}; +parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin_end(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_ipv6_bin_end(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_ipv6_end(iolist(), list(), uri_map()) -> uri_map(). +parse_ipv6_end(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_ipv6_end(unicode:characters_to_list(Str), Acc, URI); +parse_ipv6_end([H|Rest], Acc, URI) when is_binary(H) -> + parse_ipv6_end(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_ipv6_end([H|Rest], Acc, URI) when is_list(H) -> + parse_ipv6_end(H ++ Rest, Acc, URI); +parse_ipv6_end([$:|Rest], _Acc, URI) -> + parse_port(Rest, [], URI); +parse_ipv6_end([$/|Rest], _Acc, URI) -> + parse_segment(Rest, [$/], URI); % path-abempty +parse_ipv6_end([$?|Rest], _Acc, URI) -> + parse_query(Rest, [$?], URI); % path-empty ?query +parse_ipv6_end([$#|Rest], _Acc, URI) -> + parse_fragment(Rest, [], URI); % path-empty +parse_ipv6_end([], _Acc, URI) -> + URI. + + +-spec validate_ipv6_address(list()) -> list(). +validate_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw(uri_parse_error) + end. + + %%------------------------------------------------------------------------- %% [RFC 3986, Chapter 3.2.2. Port] %% @@ -1106,8 +1322,20 @@ is_digit(C) when $0 =< C, C =< $9 -> true; is_digit(_) -> false. +-spec is_hex_digit(char()) -> boolean(). +is_hex_digit(C) + when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; +is_hex_digit(_) -> false. + %% Returns the size of a binary exluding the first element. %% Used in calls to split_binary(). -spec byte_size_exl_head(binary()) -> number(). byte_size_exl_head(<<>>) -> 0; byte_size_exl_head(Binary) -> byte_size(Binary) + 1. + +% Remove brackets from binary +-spec remove_brackets(binary()) -> binary(). +remove_brackets(?STRING_REST($[,Addr)) -> + A1 = binary:replace(Addr, <<$[>>, <<>>), + binary:replace(A1, <<$]>>, <<>>); +remove_brackets(Addr) -> Addr. |