aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-09-20 17:17:34 +0200
committerPéter Dimitrov <[email protected]>2017-10-23 15:53:28 +0200
commit892bf58ee115a7e56ff38083afd85702bb8e14d3 (patch)
treea45395b65b095288a521e099c0f8f206eed422fb /lib/stdlib
parent6c0c11eeaf0649cfbca5e426263c7dc43b49feff (diff)
downloadotp-892bf58ee115a7e56ff38083afd85702bb8e14d3.tar.gz
otp-892bf58ee115a7e56ff38083afd85702bb8e14d3.tar.bz2
otp-892bf58ee115a7e56ff38083afd85702bb8e14d3.zip
stdlib: Implement recompose
- Implemented recompose function with percent-encoding and validation of IPv4/IPv6 addresses. - Added test for recompose that uses a generated test vector (URI combinations based on a fix set of URI components). - Added test for parse-recompose using a generated test vector. - Removed parsing functions for lists. Lists are converted to binary before parsing.
Diffstat (limited to 'lib/stdlib')
-rwxr-xr-xlib/stdlib/src/uri_string.erl783
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl300
2 files changed, 701 insertions, 382 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 50e8a0bf5a..89a2c21518 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -223,16 +223,39 @@
%%
-module(uri_string).
+%%-------------------------------------------------------------------------
+%% External API
+%%-------------------------------------------------------------------------
-export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1,
parse/1, recompose/1, resolve_uri_reference/2, transcode/2]).
--export([is_host/1, is_path/1]). % suppress warnings
-export_type([uri_map/0, uri_string/0]).
+
+%%-------------------------------------------------------------------------
+%% Internal API
+%%-------------------------------------------------------------------------
+-export([is_host/1, is_path/1]). % suppress warnings
+
+
+%%-------------------------------------------------------------------------
+%% Macros
+%%-------------------------------------------------------------------------
-define(CHAR(Char), <<Char/utf8>>).
-define(STRING_EMPTY, <<>>).
-define(STRING(MatchStr), <<MatchStr/binary>>).
-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+-define(DEC2HEX(X),
+ if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0;
+ ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10
+ end).
+
+-define(HEX2DEC(X),
+ if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0;
+ ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10;
+ ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10
+ end).
+
%%%=========================================================================
%%% API
@@ -250,8 +273,9 @@
%%-------------------------------------------------------------------------
-type uri_string() :: iodata().
-
+%%-------------------------------------------------------------------------
%% RFC 3986, Chapter 3. Syntax Components
+%%-------------------------------------------------------------------------
-type uri_map() ::
#{fragment => unicode:chardata(),
host => unicode:chardata(),
@@ -261,25 +285,44 @@
scheme => unicode:chardata(),
userinfo => unicode:chardata()} | #{}.
+%%-------------------------------------------------------------------------
%% Parse URIs
+%%-------------------------------------------------------------------------
-spec parse(URIString) -> URIMap when
URIString :: uri_string(),
URIMap :: uri_map().
-parse(URIString) ->
- if is_binary(URIString) ->
- parse_uri_reference(URIString, #{});
- true ->
- parse_uri_reference(URIString, [], #{})
- end.
+parse(URIString) when is_binary(URIString) ->
+ parse_uri_reference(URIString, #{});
+parse(URIString) when is_list(URIString) ->
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map).
+%%-------------------------------------------------------------------------
%% Recompose URIs
+%%-------------------------------------------------------------------------
-spec recompose(URIMap) -> URIString when
URIMap :: uri_map(),
URIString :: uri_string().
-recompose(_) ->
- "".
+recompose(Map) when map_size(Map) =:= 0 ->
+ "";
+recompose(Map) ->
+ case is_valid_map(Map) of
+ false ->
+ error({badarg, invalid_map});
+ true ->
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ end.
+%%-------------------------------------------------------------------------
%% Resolve references
+%%-------------------------------------------------------------------------
-spec resolve_uri_reference(RelativeURI, AbsoluteBaseURI) -> AbsoluteDestURI when
RelativeURI :: uri_string(),
AbsoluteBaseURI :: uri_string(),
@@ -287,7 +330,9 @@ recompose(_) ->
resolve_uri_reference(_,_) ->
"".
+%%-------------------------------------------------------------------------
%% Create references
+%%-------------------------------------------------------------------------
-spec create_uri_reference(AbsoluteSourceURI, AbsoluteBaseURI) -> RelativeDestURI when
AbsoluteSourceURI :: uri_string(),
AbsoluteBaseURI :: uri_string(),
@@ -295,33 +340,42 @@ resolve_uri_reference(_,_) ->
create_uri_reference(_,_) ->
"".
+%%-------------------------------------------------------------------------
%% Normalize URIs
+%%-------------------------------------------------------------------------
-spec normalize(URIString) -> NormalizedURI when
URIString :: uri_string(),
NormalizedURI :: uri_string().
normalize(_) ->
"".
+%%-------------------------------------------------------------------------
%% Transcode URIs
+%%-------------------------------------------------------------------------
-spec transcode(URIString, Options) -> URIString when
URIString :: uri_string(),
Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}].
transcode(_, _) ->
"".
-
+%%-------------------------------------------------------------------------
%% Working with query strings
%% HTML 2.0 - application/x-www-form-urlencoded
%% RFC 1866 [8.2.1]
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
%% Compose urlencoded query string from a list of unescaped key/value pairs.
+%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
QueryList :: [{unicode:chardata(), unicode:chardata()}],
QueryString :: uri_string().
compose_query(_) ->
"".
+%%-------------------------------------------------------------------------
%% Dissect a query string into a list of unescaped key/value pairs.
+%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
QueryList :: [{unicode:chardata(), unicode:chardata()}].
@@ -333,6 +387,14 @@ dissect_query(_) ->
%%% Internal functions
%%%========================================================================
+%%-------------------------------------------------------------------------
+%% Converts Map fields to lists
+%%-------------------------------------------------------------------------
+convert_mapfields_to_list(Map) ->
+ Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V);
+ (_, V) -> V end,
+ maps:map(Fun, Map).
+
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 4.1. URI Reference]
@@ -342,16 +404,6 @@ dissect_query(_) ->
%%
%% URI-reference = URI / relative-ref
%%-------------------------------------------------------------------------
--spec parse_uri_reference(iolist(), list(), uri_map()) -> uri_map().
-parse_uri_reference([], _, _) -> #{};
-parse_uri_reference(URIString, Acc, URI) ->
- try parse_scheme_start(URIString, Acc, URI) of
- Res -> Res
- catch
- throw:uri_parse_error ->
- parse_relative_part(URIString, Acc, URI)
- end.
-
-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
parse_uri_reference(<<>>, _) -> #{};
parse_uri_reference(URIString, URI) ->
@@ -411,32 +463,6 @@ parse_relative_part(?STRING_REST(Char, Rest), URI) ->
false -> throw(uri_parse_error)
end.
--spec parse_relative_part(iolist(), list(), uri_map()) -> uri_map().
-parse_relative_part([H|Rest], Acc, URI) when is_binary(H) ->
- parse_relative_part(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_relative_part([H|Rest], Acc, URI) when is_list(H) ->
- parse_relative_part(H ++ Rest, Acc, URI);
-parse_relative_part("//" ++ Rest, Acc, URI) ->
- % Parse userinfo
- try parse_userinfo(Rest, Acc, URI) of
- Res -> Res
- catch
- throw:uri_parse_error ->
- parse_host(Rest, Acc, URI)
- end;
-parse_relative_part([$/|Rest], _Acc, URI) ->
- parse_segment(Rest, [$/], URI); % path-absolute
-parse_relative_part([$?|Rest], _Acc, URI) ->
- parse_query(Rest, [$?], URI); % path-empty ?query
-parse_relative_part([$#|Rest], _Acc, URI) ->
- parse_fragment(Rest, [], URI); % path-empty
-parse_relative_part([Char|Rest], _, URI) ->
- case is_segment_nz_nc(Char) of
- true -> parse_segment_nz_nc(Rest, [Char], URI); % path-noscheme
- false -> throw(uri_parse_error)
- end.
-
%% Returns size of 'Rest' for proper calculation of splitting position.
%% Solves the following special case:
@@ -504,27 +530,6 @@ parse_segment(?STRING_REST(Char, Rest), URI) ->
parse_segment(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_segment(iolist(), list(), uri_map()) -> uri_map().
-parse_segment(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_segment(unicode:characters_to_list(Str), Acc, URI);
-parse_segment([H|Rest], Acc, URI) when is_binary(H) ->
- parse_segment(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_segment([H|Rest], Acc, URI) when is_list(H) ->
- parse_segment(H ++ Rest, Acc, URI);
-parse_segment([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/|Acc], URI); % segment
-parse_segment([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query
-parse_segment([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))});
-parse_segment([Char|Rest], Acc, URI) ->
- case is_pchar(Char) of
- true -> parse_segment(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_segment([], Acc, URI) ->
- URI#{path => decode_path(lists:reverse(Acc))}.
%%-------------------------------------------------------------------------
%% path-noscheme
@@ -548,27 +553,6 @@ parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
parse_segment_nz_nc(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_segment_nz_nc(iolist(), list(), uri_map()) -> uri_map().
-parse_segment_nz_nc(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_segment_nz_nc(unicode:characters_to_list(Str), Acc, URI);
-parse_segment_nz_nc([H|Rest], Acc, URI) when is_binary(H) ->
- parse_segment_nz_nc(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_segment_nz_nc([H|Rest], Acc, URI) when is_list(H) ->
- parse_segment_nz_nc(H ++ Rest, Acc, URI);
-parse_segment_nz_nc([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/|Acc], URI); % segment
-parse_segment_nz_nc([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query
-parse_segment_nz_nc([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))});
-parse_segment_nz_nc([Char|Rest], Acc, URI) ->
- case is_segment_nz_nc(Char) of
- true -> parse_segment_nz_nc(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_segment_nz_nc([], Acc, URI) ->
- URI#{path => decode_path(lists:reverse(Acc))}.
%% Check if char is pchar.
-spec is_pchar(char()) -> boolean().
@@ -601,18 +585,6 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
false -> throw(uri_parse_error)
end.
--spec parse_scheme_start(iolist(), list(), uri_map()) -> uri_map().
-parse_scheme_start([H|Rest], Acc, URI) when is_binary(H) ->
- parse_scheme_start(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_scheme_start([H|Rest], Acc, URI) when is_list(H) ->
- parse_scheme_start(H ++ Rest, Acc, URI);
-parse_scheme_start([Char|Rest], Acc, URI) ->
- case is_alpha(Char) of
- true -> parse_scheme(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end.
-
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
@@ -626,23 +598,6 @@ parse_scheme(?STRING_REST(Char, Rest), URI) ->
parse_scheme(?STRING_EMPTY, _URI) ->
throw(uri_parse_error).
--spec parse_scheme(iolist(), list(), uri_map()) -> uri_map().
-parse_scheme(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_scheme(unicode:characters_to_list(Str), Acc, URI);
-parse_scheme([H|Rest], Acc, URI) when is_binary(H) ->
- parse_scheme(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_scheme([H|Rest], Acc, URI) when is_list(H) ->
- parse_scheme(H ++ Rest, Acc, URI);
-parse_scheme([$:|Rest], Acc, URI) ->
- parse_hier(Rest, [], URI#{scheme => lists:reverse(Acc)});
-parse_scheme([Char|Rest], Acc, URI) ->
- case is_scheme(Char) of
- true -> parse_scheme(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_scheme([], _Acc, _URI) ->
- throw(uri_parse_error).
%% Check if char is allowed in scheme
-spec is_scheme(char()) -> boolean().
@@ -694,36 +649,6 @@ parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
parse_hier(?STRING_EMPTY, URI) ->
{<<>>, URI}.
--spec parse_hier(iolist(), list(), uri_map()) -> uri_map().
-parse_hier(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_hier(unicode:characters_to_list(Str), Acc, URI);
-parse_hier([H|Rest], Acc, URI) when is_binary(H) ->
- parse_hier(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_hier([H|Rest], Acc, URI) when is_list(H) ->
- parse_hier(H ++ Rest, Acc, URI);
-parse_hier("//" ++ Rest, Acc, URI) ->
- % Parse userinfo
- try parse_userinfo(Rest, Acc, URI) of
- Res -> Res
- catch
- throw:uri_parse_error ->
- parse_host(Rest, [], URI)
- end;
-parse_hier([$/|Rest], _Acc, URI) ->
- parse_segment(Rest, [$/], URI); % path-absolute
-parse_hier([$?|Rest], _Acc, URI) ->
- parse_query(Rest, [$?], URI); % path-empty ?query
-parse_hier([$#|Rest], _Acc, URI) ->
- parse_fragment(Rest, [], URI); % path-empty
-parse_hier([Char|Rest], _, URI) -> % path-rootless
- case is_pchar(Char) of
- true -> parse_segment(Rest, [Char], URI);
- false -> throw(uri_parse_error)
- end;
-parse_hier([], _, URI) ->
- URI.
-
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 3.2. Authority]
@@ -766,27 +691,6 @@ parse_userinfo(?STRING_EMPTY, _URI) ->
%% URI cannot end in userinfo state
throw(uri_parse_error).
--spec parse_userinfo(iolist(), list(), uri_map()) -> uri_map().
-parse_userinfo(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_userinfo(unicode:characters_to_list(Str), Acc, URI);
-parse_userinfo([H|Rest], Acc, URI) when is_binary(H) ->
- parse_userinfo(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_userinfo([H|Rest], Acc, URI) when is_list(H) ->
- parse_userinfo(H ++ Rest, Acc, URI);
-parse_userinfo([$@], _Acc, _URI) ->
- %% URI cannot end in userinfo state
- throw(uri_parse_error);
-parse_userinfo([$@|Rest], Acc, URI) ->
- parse_host(Rest, [], URI#{userinfo => decode_userinfo(lists:reverse(Acc))});
-parse_userinfo([Char|Rest], Acc, URI) ->
- case is_userinfo(Char) of
- true -> parse_userinfo(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_userinfo([], _Acc, _URI) ->
- %% URI cannot end in userinfo state
- throw(uri_parse_error).
%% Check if char is allowed in userinfo
-spec is_userinfo(char()) -> boolean().
@@ -862,32 +766,6 @@ parse_host(?STRING_REST(Char, Rest), URI) ->
parse_host(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_host(iolist(), list(), uri_map()) -> uri_map().
-parse_host(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_host(unicode:characters_to_list(Str), Acc, URI);
-parse_host([H|Rest], Acc, URI) when is_binary(H) ->
- parse_host(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_host([H|Rest], Acc, URI) when is_list(H) ->
- parse_host(H ++ Rest, Acc, URI);
-parse_host([$:|Rest], Acc, URI) ->
- parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))});
-parse_host([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty
-parse_host([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query
-parse_host([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty
-parse_host([$[|Rest], _Acc, URI) ->
- parse_ipv6(Rest, [], URI);
-parse_host([Char|Rest], Acc, URI) ->
- case is_digit(Char) of
- true -> parse_ipv4(Rest, [Char|Acc], URI);
- false -> parse_reg_name([Char|Rest], Acc, URI)
- end;
-parse_host([], Acc, URI) ->
- URI#{host => decode_host(lists:reverse(Acc))}.
-
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
parse_reg_name(?STRING_REST($:, Rest), URI) ->
@@ -915,30 +793,6 @@ parse_reg_name(?STRING_REST(Char, Rest), URI) ->
parse_reg_name(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_reg_name(iolist(), list(), uri_map()) -> uri_map().
-parse_reg_name(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_reg_name(unicode:characters_to_list(Str), Acc, URI);
-parse_reg_name([H|Rest], Acc, URI) when is_binary(H) ->
- parse_reg_name(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_reg_name([H|Rest], Acc, URI) when is_list(H) ->
- parse_reg_name(H ++ Rest, Acc, URI);
-parse_reg_name([$:|Rest], Acc, URI) ->
- parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))});
-parse_reg_name([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty
-parse_reg_name([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query
-parse_reg_name([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty
-parse_reg_name([Char|Rest], Acc, URI) ->
- case is_reg_name(Char) of
- true -> parse_reg_name(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_reg_name([], Acc, URI) ->
- URI#{host => decode_host(lists:reverse(Acc))}.
-
%% Check if char is allowed in reg-name
-spec is_reg_name(char()) -> boolean().
is_reg_name($%) -> true;
@@ -976,29 +830,6 @@ parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{?STRING_EMPTY, URI}.
--spec parse_ipv4(iolist(), list(), uri_map()) -> uri_map().
-parse_ipv4(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_ipv4(unicode:characters_to_list(Str), Acc, URI);
-parse_ipv4([H|Rest], Acc, URI) when is_binary(H) ->
- parse_ipv4(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_ipv4([H|Rest], Acc, URI) when is_list(H) ->
- parse_ipv4(H ++ Rest, Acc, URI);
-parse_ipv4([$:|Rest], Acc, URI) ->
- parse_port(Rest, [], URI#{host => validate_ipv4_address(lists:reverse(Acc))});
-parse_ipv4([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-abempty
-parse_ipv4([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-empty ?query
-parse_ipv4([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{host => validate_ipv4_address(lists:reverse(Acc))}); % path-empty
-parse_ipv4([Char|Rest], Acc, URI) ->
- case is_ipv4(Char) of
- true -> parse_ipv4(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_ipv4([], Acc, URI) ->
- URI#{host => validate_ipv4_address(lists:reverse(Acc))}.
%% Check if char is allowed in IPv4 addresses
-spec is_ipv4(char()) -> boolean().
@@ -1025,27 +856,6 @@ parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
throw(uri_parse_error).
--spec parse_ipv6(iolist(), list(), uri_map()) -> uri_map().
-parse_ipv6(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_ipv6(unicode:characters_to_list(Str), Acc, URI);
-parse_ipv6([H|Rest], Acc, URI) when is_binary(H) ->
- parse_ipv6(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_ipv6([H|Rest], Acc, URI) when is_list(H) ->
- parse_ipv6(H ++ Rest, Acc, URI);
-parse_ipv6([$]|Rest], Acc, URI) ->
- parse_ipv6_end(Rest, [], URI#{host => validate_ipv6_address(lists:reverse(Acc))});
-parse_ipv6([Char|Rest], Acc, URI) ->
- case is_ipv6(Char) of
- true -> parse_ipv6(Rest, [Char|Acc], URI);
- false ->
- io:format("# DEBUG Char: >>~c<<~n", [Char]),
- io:format("# DEBUG Rest: >>~s<<~n", [Rest]),
- throw(uri_parse_error)
- end;
-parse_ipv6([], _Acc, _URI) ->
- throw(uri_parse_error).
-
%% Check if char is allowed in IPv6 addresses
-spec is_ipv6(char()) -> boolean().
is_ipv6($:) -> true;
@@ -1079,26 +889,6 @@ parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_ipv6_end(iolist(), list(), uri_map()) -> uri_map().
-parse_ipv6_end(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_ipv6_end(unicode:characters_to_list(Str), Acc, URI);
-parse_ipv6_end([H|Rest], Acc, URI) when is_binary(H) ->
- parse_ipv6_end(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_ipv6_end([H|Rest], Acc, URI) when is_list(H) ->
- parse_ipv6_end(H ++ Rest, Acc, URI);
-parse_ipv6_end([$:|Rest], _Acc, URI) ->
- parse_port(Rest, [], URI);
-parse_ipv6_end([$/|Rest], _Acc, URI) ->
- parse_segment(Rest, [$/], URI); % path-abempty
-parse_ipv6_end([$?|Rest], _Acc, URI) ->
- parse_query(Rest, [$?], URI); % path-empty ?query
-parse_ipv6_end([$#|Rest], _Acc, URI) ->
- parse_fragment(Rest, [], URI); % path-empty
-parse_ipv6_end([], _Acc, URI) ->
- URI.
-
-
-spec validate_ipv6_address(list()) -> list().
validate_ipv6_address(Addr) ->
case inet:parse_ipv6strict_address(Addr) of
@@ -1137,32 +927,6 @@ parse_port(?STRING_REST(Char, Rest), URI) ->
parse_port(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_port(iolist(), list(), uri_map()) -> uri_map().
-parse_port(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_port(unicode:characters_to_list(Str), Acc, URI);
-parse_port([H|Rest], Acc, URI) when is_binary(H) ->
- parse_port(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_port([H|Rest], Acc, URI) when is_list(H) ->
- parse_port(H ++ Rest, Acc, URI);
-parse_port([$/|Rest], Acc, URI) ->
- {Port, _} = string:to_integer(lists:reverse(Acc)),
- parse_segment(Rest, [$/], URI#{port => Port}); % path-abempty
-parse_port([$?|Rest], Acc, URI) ->
- {Port, _} = string:to_integer(lists:reverse(Acc)),
- parse_query(Rest, [$?], URI#{port => Port}); % path-empty ?query
-parse_port([$#|Rest], Acc, URI) ->
- {Port, _} = string:to_integer(lists:reverse(Acc)),
- parse_fragment(Rest, [], URI#{port => Port}); % path-empty
-parse_port([Char|Rest], Acc, URI) ->
- case is_digit(Char) of
- true -> parse_port(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_port([], Acc, URI) ->
- {Port, _} = string:to_integer(lists:reverse(Acc)),
- URI#{port => Port}.
-
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 3.4. Query]
@@ -1189,23 +953,6 @@ parse_query(?STRING_REST(Char, Rest), URI) ->
parse_query(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_query(iolist(), list(), uri_map()) -> uri_map().
-parse_query(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_query(unicode:characters_to_list(Str), Acc, URI);
-parse_query([H|Rest], Acc, URI) when is_binary(H) ->
- parse_query(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_query([H|Rest], Acc, URI) when is_list(H) ->
- parse_query(H ++ Rest, Acc, URI);
-parse_query([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{query => decode_query(lists:reverse(Acc))});
-parse_query([Char|Rest], Acc, URI) ->
- case is_query(Char) of
- true -> parse_query(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_query([], Acc, URI) ->
- URI#{query => decode_query(lists:reverse(Acc))}.
%% Check if char is allowed in query
-spec is_query(char()) -> boolean().
@@ -1232,21 +979,6 @@ parse_fragment(?STRING_REST(Char, Rest), URI) ->
parse_fragment(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
--spec parse_fragment(iolist(), list(), uri_map()) -> uri_map().
-parse_fragment(?STRING(Str), Acc, URI) when is_list(Acc) ->
- parse_fragment(unicode:characters_to_list(Str), Acc, URI);
-parse_fragment([H|Rest], Acc, URI) when is_binary(H) ->
- parse_fragment(unicode:characters_to_list(H, utf8) ++ Rest,
- Acc, URI);
-parse_fragment([H|Rest], Acc, URI) when is_list(H) ->
- parse_fragment(H ++ Rest, Acc, URI);
-parse_fragment([Char|Rest], Acc, URI) ->
- case is_fragment(Char) of
- true -> parse_fragment(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
- end;
-parse_fragment([], Acc, URI) ->
- URI#{fragment => decode_fragment(lists:reverse(Acc))}.
%% Check if char is allowed in fragment
-spec is_fragment(char()) -> boolean().
@@ -1266,21 +998,6 @@ is_fragment(Char) -> is_pchar(Char).
%% / "*" / "+" / "," / ";" / "="
%%
%%-------------------------------------------------------------------------
-%% %% Return true if input char is reserved.
-%% -spec is_reserved(char()) -> boolean().
-%% is_reserved(Char) ->
-%% is_gen_delim(Char) orelse is_sub_delim(Char).
-
-%% %% Check if char is reserved.
-%% -spec is_gen_delim(char()) -> boolean().
-%% is_gen_delim($:) -> true;
-%% is_gen_delim($/) -> true;
-%% is_gen_delim($?) -> true;
-%% is_gen_delim($#) -> true;
-%% is_gen_delim($[) -> true;
-%% is_gen_delim($]) -> true;
-%% is_gen_delim($@) -> true;
-%% is_gen_delim(_) -> false.
%% Check if char is sub-delim.
-spec is_sub_delim(char()) -> boolean().
@@ -1328,17 +1045,22 @@ is_hex_digit(C)
when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true;
is_hex_digit(_) -> false.
+
%% Returns the size of a binary exluding the first element.
%% Used in calls to split_binary().
-spec byte_size_exl_head(binary()) -> number().
byte_size_exl_head(<<>>) -> 0;
byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
-% Remove brackets from binary
+
+%% Remove enclosing brackets from binary
-spec remove_brackets(binary()) -> binary().
-remove_brackets(?STRING_REST($[,Addr)) ->
- A1 = binary:replace(Addr, <<$[>>, <<>>),
- binary:replace(A1, <<$]>>, <<>>);
+remove_brackets(<<$[/utf8, Rest/binary>>) ->
+ {H,T} = split_binary(Rest, byte_size(Rest) - 1),
+ case T =:= <<$]/utf8>> of
+ true -> H;
+ false -> Rest
+ end;
remove_brackets(Addr) -> Addr.
@@ -1362,42 +1084,72 @@ remove_brackets(Addr) -> Addr.
decode_userinfo(Cs) ->
decode(Cs, fun is_userinfo/1, <<>>).
-
-spec decode_host(list()|binary()) -> list() | binary().
decode_host(Cs) ->
decode(Cs, fun is_host/1, <<>>).
-%% Check if char is allowed in host
--spec is_host(char()) -> boolean().
-is_host($:) -> true;
-is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
-
-
-spec decode_path(list()|binary()) -> list() | binary().
decode_path(Cs) ->
decode(Cs, fun is_path/1, <<>>).
-%% Check if char is allowed in path
--spec is_path(char()) -> boolean().
-is_path($/) -> true;
-
-is_path(Char) -> is_pchar(Char).
-
-
-spec decode_query(list()|binary()) -> list() | binary().
decode_query(Cs) ->
decode(Cs, fun is_query/1, <<>>).
-spec decode_fragment(list()|binary()) -> list() | binary().
decode_fragment(Cs) ->
- decode(Cs, fun is_host/1, <<>>).
+ decode(Cs, fun is_fragment/1, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Percent-encode
+%%-------------------------------------------------------------------------
+
+%% Only validates as scheme cannot have percent-encoded characters
+-spec encode_scheme(list()|binary()) -> list() | binary().
+encode_scheme([]) ->
+ throw(uri_parse_error);
+encode_scheme(<<>>) ->
+ throw(uri_parse_error);
+encode_scheme(Scheme) ->
+ case validate_scheme(Scheme) of
+ true -> Scheme;
+ false -> throw(uri_parse_error)
+ end.
+
+-spec encode_userinfo(list()|binary()) -> list() | binary().
+encode_userinfo(Cs) ->
+ encode(Cs, fun is_userinfo/1).
+
+-spec encode_host(list()|binary()) -> list() | binary().
+encode_host(Cs) ->
+ case classify_host(Cs) of
+ regname -> Cs;
+ ipv4 -> Cs;
+ ipv6 -> bracket_ipv6(Cs);
+ other -> encode(Cs, fun is_reg_name/1)
+ end.
+-spec encode_path(list()|binary()) -> list() | binary().
+encode_path(Cs) ->
+ encode(Cs, fun is_path/1).
+-spec encode_query(list()|binary()) -> list() | binary().
+encode_query(Cs) ->
+ encode(Cs, fun is_query/1).
+
+-spec encode_fragment(list()|binary()) -> list() | binary().
+encode_fragment(Cs) ->
+ encode(Cs, fun is_fragment/1).
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for percent-decode
+%%-------------------------------------------------------------------------
-spec decode(list()|binary(), fun(), binary()) -> list() | binary().
decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
- B = hex2dec(C0)*16+hex2dec(C1),
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
decode(Cs, Fun, <<Acc/binary, B>>);
false -> throw(uri_parse_error)
end;
@@ -1411,7 +1163,7 @@ decode(<<>>, _Fun, Acc) ->
decode([$%,C0,C1|Cs], Fun, Acc) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
- B = hex2dec(C0)*16+hex2dec(C1),
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
decode(Cs, Fun, <<Acc/binary, B>>);
false -> throw(uri_parse_error)
end;
@@ -1423,7 +1175,278 @@ decode([C|Cs], Fun, Acc) ->
decode([], _Fun, Acc) ->
unicode:characters_to_list(Acc).
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+is_path(Char) -> is_pchar(Char).
+
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for percent-encode
+%%-------------------------------------------------------------------------
+-spec encode(list()|binary(), fun()) -> list() | binary().
+encode(Component, Fun) when is_list(Component) ->
+ B = unicode:characters_to_binary(Component),
+ unicode:characters_to_list(encode(B, Fun, <<>>));
+encode(Component, Fun) when is_binary(Component) ->
+ encode(Component, Fun, <<>>).
+%%
+encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
+ C = encode_codepoint_binary(Char, Fun),
+ encode(Rest, Fun, <<Acc/binary,C/binary>>);
+encode(<<_Char, _Rest/binary>>, _Fun, _Acc) ->
+ throw(uri_parse_error);
+encode(<<>>, _Fun, Acc) ->
+ Acc.
+
+
+-spec encode_codepoint_binary(integer(), fun()) -> list().
+encode_codepoint_binary(C, Fun) ->
+ case Fun(C) of
+ false -> percent_encode_binary(C);
+ true -> <<C>>
+ end.
+
+
+-spec percent_encode_binary(integer()) -> binary().
+percent_encode_binary(Code) ->
+ percent_encode_binary(<<Code/utf8>>, <<>>).
+
+
+percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) ->
+ percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
-hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0;
-hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10;
-hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10.
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<<H, Rest/binary>>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> false;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end;
+classify_host_regname(<<>>) -> regname;
+classify_host_regname(<<H, Rest/binary>>) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(Rest);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%% It filters out the following combinations from the set of all possible
+%% values:
+%% - <no-userinfo> <no-host> port
+%% E.g. ":8080" - invalid URI
+%% - userinfo <no-host> <no-port>
+%% E.g. "//user@" - invalid URI
+%% - userinfo <no-host> port
+%% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"}
+%% There is always at least an empty host when both userinfo and port
+%% are present.
+%%-------------------------------------------------------------------------
+is_valid_map(Map) ->
+ case
+ (not maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ maps:is_key(port, Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ not maps:is_key(port, Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ maps:is_key(port, Map))
+ of
+ true ->
+ false;
+ false ->
+ true
+ end.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,encode_query(Query));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_query(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <<A/binary, B/binary>>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(empty) -> empty;
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_colon(empty) -> empty;
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>;
+add_colon(Comp) when is_list(Comp) ->
+ [$:|Comp].
+
+add_colon_postfix(empty) -> empty;
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <<Comp/binary,$:>>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(empty) -> empty;
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(_, empty) -> empty;
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index c379eeb15b..1859a25a18 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -31,9 +31,31 @@
parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
- parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ recompose_fragment/1, recompose_parse_fragment/1,
+ recompose_query/1, recompose_parse_query/1,
+ recompose_path/1, recompose_parse_path/1,
+ recompose_autogen/1, parse_recompose_autogen/1
]).
+
+-define(SCHEME, "foo").
+-define(USERINFO, "åsa").
+-define(USERINFO_ENC, "%C3%A5sa").
+-define(HOST, "älvsjö").
+-define(HOST_ENC, "%C3%A4lvsj%C3%B6").
+-define(IPV6, "::127.0.0.1").
+-define(IPV6_ENC, "[::127.0.0.1]").
+-define(PORT, 8042).
+-define(PORT_ENC, ":8042").
+-define(PATH, "/där").
+-define(PATH_ENC, "/d%C3%A4r").
+-define(QUERY, "?name=örn").
+-define(QUERY_ENC, "?name=%C3%B6rn").
+-define(FRAGMENT, "näsa").
+-define(FRAGMENT_ENC, "#n%C3%A4sa").
+
+
suite() ->
[{timetrap,{minutes,1}}].
@@ -66,12 +88,202 @@ all() ->
parse_list,
parse_binary,
parse_mixed,
- parse_relative
+ parse_relative,
+ recompose_fragment,
+ recompose_parse_fragment,
+ recompose_query,
+ recompose_parse_query,
+ recompose_path,
+ recompose_parse_path,
+ recompose_autogen,
+ parse_recompose_autogen
].
groups() ->
[].
+
+%%-------------------------------------------------------------------------
+%% Helper functions
+%%-------------------------------------------------------------------------
+uri_combinations() ->
+ [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] ||
+ Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none],
+ Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none],
+ Hst <- [fun update_host/1, fun update_host_binary/1,
+ fun update_ipv6/1, fun update_ipv6_binary/1, none],
+ Prt <- [fun update_port/1, none],
+ Pat <- [fun update_path/1, fun update_path_binary/1, none],
+ Qry <- [fun update_query/1,fun update_query_binary/1, none],
+ Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none],
+ not (Usr =:= none andalso Hst =:= none andalso Prt =/= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =:= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)].
+
+
+generate_test_vector(Comb) ->
+ Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI});
+ (_, Map) -> Map
+ end,
+ lists:foldl(Fun, {#{}, empty}, Comb).
+
+generate_test_vectors(L) ->
+ lists:map(fun generate_test_vector/1, L).
+
+update_fragment({In, empty}) ->
+ {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_list(Out) ->
+ {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_binary(Out) ->
+ {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}.
+
+update_fragment_binary({In, empty}) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>};
+update_fragment_binary({In, Out}) when is_list(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC};
+update_fragment_binary({In, Out}) when is_binary(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}.
+
+
+update_query({In, empty}) ->
+ {In#{query => ?QUERY}, ?QUERY_ENC};
+update_query({In, Out}) when is_list(Out) ->
+ {In#{query => ?QUERY}, Out ++ ?QUERY_ENC};
+update_query({In, Out}) when is_binary(Out) ->
+ {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}.
+
+update_query_binary({In, empty}) ->
+ {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>};
+update_query_binary({In, Out}) when is_list(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC};
+update_query_binary({In, Out}) when is_binary(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}.
+
+update_path({In, empty}) ->
+ {In#{path => ?PATH}, ?PATH_ENC};
+update_path({In, Out}) when is_list(Out) ->
+ {In#{path => ?PATH}, Out ++ ?PATH_ENC};
+update_path({In, Out}) when is_binary(Out) ->
+ {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}.
+
+update_path_binary({In, empty}) ->
+ {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>};
+update_path_binary({In, Out}) when is_list(Out) ->
+ {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC};
+update_path_binary({In, Out}) when is_binary(Out) ->
+ {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}.
+
+update_port({In, Out}) when is_list(Out) ->
+ {In#{port => ?PORT}, Out ++ ?PORT_ENC};
+update_port({In, Out}) when is_binary(Out) ->
+ {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}.
+
+update_host({In, empty}) ->
+ {In#{host => ?HOST}, "//" ++ ?HOST_ENC};
+update_host({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]}
+ end.
+
+update_host_binary({In, empty}) ->
+ {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>};
+update_host_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>};
+ false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>}
+ end.
+
+update_ipv6({In, empty}) ->
+ {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC};
+update_ipv6({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]}
+ end.
+
+update_ipv6_binary({In, empty}) ->
+ {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>};
+update_ipv6_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>};
+ false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>}
+ end.
+
+update_userinfo({In, empty}) ->
+ {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_list(Out) ->
+ {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}.
+
+update_userinfo_binary({In, empty}) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>};
+update_userinfo_binary({In, Out}) when is_list(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo_binary({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}.
+
+update_scheme({In, empty}) ->
+ {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}.
+
+update_scheme_binary({In, empty}) ->
+ {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}.
+
+
+%% Test recompose on a generated test vector
+run_test_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(#{}) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(Map) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+%% Test parse - recompose on a generated test vector
+run_test_parse_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(uri_string:parse("")) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_parse_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(uri_string:parse(URI)) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Parse tests
+%%-------------------------------------------------------------------------
+
parse_binary_scheme(_Config) ->
#{} = uri_string:parse(<<>>),
#{path := <<"foo">>} = uri_string:parse(<<"foo">>),
@@ -438,3 +650,87 @@ parse_relative(_Config) ->
uri_string:parse(lists:append("/pa",<<"th">>)),
#{path := "foo"} =
uri_string:parse(lists:append("fo",<<"o">>)).
+
+
+%%-------------------------------------------------------------------------
+%% Recompose tests
+%%-------------------------------------------------------------------------
+recompose_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>}),
+ ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT}).
+
+recompose_parse_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)),
+ ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)).
+
+recompose_query(_Config) ->
+ <<?QUERY_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>}),
+ <<?QUERY_ENC?FRAGMENT_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>,
+ fragment => <<?FRAGMENT/utf8>>}),
+ "?name=%C3%B6rn" =
+ uri_string:recompose(#{query => "?name=örn"}),
+ "?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{query => "?name=örn",
+ fragment => "näsa"}).
+
+recompose_parse_query(_Config) ->
+ <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)),
+ <<"?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)),
+ "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")),
+ "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")).
+
+recompose_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>}),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"?name=örn"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"?name=örn"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+
+
+ "/d%C3%A4r" =
+ uri_string:recompose(#{path => "/där"}),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ fragment => "näsa"}),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(#{path => "/där",
+ query => "?name=örn"}),
+ "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ query => "?name=örn",
+ fragment => "näsa"}).
+
+
+recompose_parse_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)),
+
+ "/d%C3%A4r" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r")),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")).
+
+
+recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_recompose/1, Tests).
+
+parse_recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_parse_recompose/1, Tests).