aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-09-19 16:07:49 +0200
committerPéter Dimitrov <[email protected]>2017-10-23 15:53:28 +0200
commit6c0c11eeaf0649cfbca5e426263c7dc43b49feff (patch)
tree2470505a10e93628bbf03fe59e20a63a944b84e6 /lib
parentec3f0c7f96531b714082f5af694a7ed6a02769ce (diff)
downloadotp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.tar.gz
otp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.tar.bz2
otp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.zip
stdlib: Add support to parse percent-encoded URIs
Diffstat (limited to 'lib')
-rwxr-xr-xlib/stdlib/src/uri_string.erl198
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl78
2 files changed, 217 insertions, 59 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 3656d561be..50e8a0bf5a 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -223,9 +223,9 @@
%%
-module(uri_string).
-
-export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1,
parse/1, recompose/1, resolve_uri_reference/2, transcode/2]).
+-export([is_host/1, is_path/1]). % suppress warnings
-export_type([uri_map/0, uri_string/0]).
-define(CHAR(Char), <<Char/utf8>>).
@@ -383,31 +383,31 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
try parse_userinfo(Rest, URI) of
{T, URI1} ->
{Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
- URI1#{userinfo => Userinfo}
+ URI1#{userinfo => decode_userinfo(Userinfo)}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
{Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
- URI1#{host => remove_brackets(Host)}
+ URI1#{host => decode_host(remove_brackets(Host))}
end;
parse_relative_part(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- URI1#{path => ?STRING_REST($/, Path)};
+ URI1#{path => decode_path(?STRING_REST($/, Path))};
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- URI1#{query => ?STRING_REST($?, Query)};
+ URI1#{query => decode_query(?STRING_REST($?, Query))};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- URI1#{fragment => Fragment};
+ URI1#{fragment => decode_fragment(Fragment)};
parse_relative_part(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true ->
{T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- URI1#{path => ?STRING_REST(Char, Path)};
+ URI1#{path => decode_path(?STRING_REST(Char, Path))};
false -> throw(uri_parse_error)
end.
@@ -491,11 +491,11 @@ parse_segment(?STRING_REST($/, Rest), URI) ->
parse_segment(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
true -> parse_segment(Rest, URI);
@@ -515,16 +515,16 @@ parse_segment([H|Rest], Acc, URI) when is_list(H) ->
parse_segment([$/|Rest], Acc, URI) ->
parse_segment(Rest, [$/|Acc], URI); % segment
parse_segment([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query
+ parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query
parse_segment([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)});
+ parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))});
parse_segment([Char|Rest], Acc, URI) ->
case is_pchar(Char) of
true -> parse_segment(Rest, [Char|Acc], URI);
false -> throw(uri_parse_error)
end;
parse_segment([], Acc, URI) ->
- URI#{path => lists:reverse(Acc)}.
+ URI#{path => decode_path(lists:reverse(Acc))}.
%%-------------------------------------------------------------------------
%% path-noscheme
@@ -535,11 +535,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true -> parse_segment_nz_nc(Rest, URI);
@@ -559,16 +559,16 @@ parse_segment_nz_nc([H|Rest], Acc, URI) when is_list(H) ->
parse_segment_nz_nc([$/|Rest], Acc, URI) ->
parse_segment(Rest, [$/|Acc], URI); % segment
parse_segment_nz_nc([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query
+ parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query
parse_segment_nz_nc([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)});
+ parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))});
parse_segment_nz_nc([Char|Rest], Acc, URI) ->
case is_segment_nz_nc(Char) of
true -> parse_segment_nz_nc(Rest, [Char|Acc], URI);
false -> throw(uri_parse_error)
end;
parse_segment_nz_nc([], Acc, URI) ->
- URI#{path => lists:reverse(Acc)}.
+ URI#{path => decode_path(lists:reverse(Acc))}.
%% Check if char is pchar.
-spec is_pchar(char()) -> boolean().
@@ -664,31 +664,31 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
try parse_userinfo(Rest, URI) of
{T, URI1} ->
{Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
- {Rest, URI1#{userinfo => Userinfo}}
+ {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
{Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{host => remove_brackets(Host)}}
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
end;
parse_hier(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_hier(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
case is_pchar(Char) of
true -> % segment_nz
{T, URI1} = parse_segment(Rest, URI),
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST(Char, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
false -> throw(uri_parse_error)
end;
parse_hier(?STRING_EMPTY, URI) ->
@@ -756,7 +756,7 @@ parse_userinfo(?CHAR($@), _URI) ->
parse_userinfo(?STRING_REST($@, Rest), URI) ->
{T, URI1} = parse_host(Rest, URI),
{Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{host => remove_brackets(Host)}};
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
true -> parse_userinfo(Rest, URI);
@@ -778,11 +778,11 @@ parse_userinfo([$@], _Acc, _URI) ->
%% URI cannot end in userinfo state
throw(uri_parse_error);
parse_userinfo([$@|Rest], Acc, URI) ->
- parse_host(Rest, [], URI#{userinfo => lists:reverse(Acc)});
+ parse_host(Rest, [], URI#{userinfo => decode_userinfo(lists:reverse(Acc))});
parse_userinfo([Char|Rest], Acc, URI) ->
case is_userinfo(Char) of
true -> parse_userinfo(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error) % URI#{userinfo => lists:reverse(Acc)}
+ false -> throw(uri_parse_error)
end;
parse_userinfo([], _Acc, _URI) ->
%% URI cannot end in userinfo state
@@ -843,17 +843,17 @@ parse_host(?STRING_REST($:, Rest), URI) ->
parse_host(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_host(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_host(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
true -> parse_ipv4_bin(Rest, [Char], URI);
@@ -871,13 +871,13 @@ parse_host([H|Rest], Acc, URI) when is_binary(H) ->
parse_host([H|Rest], Acc, URI) when is_list(H) ->
parse_host(H ++ Rest, Acc, URI);
parse_host([$:|Rest], Acc, URI) ->
- parse_port(Rest, [], URI#{host => lists:reverse(Acc)});
+ parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))});
parse_host([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty
+ parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty
parse_host([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query
+ parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query
parse_host([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty
+ parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty
parse_host([$[|Rest], _Acc, URI) ->
parse_ipv6(Rest, [], URI);
parse_host([Char|Rest], Acc, URI) ->
@@ -886,7 +886,7 @@ parse_host([Char|Rest], Acc, URI) ->
false -> parse_reg_name([Char|Rest], Acc, URI)
end;
parse_host([], Acc, URI) ->
- URI#{host => lists:reverse(Acc)}.
+ URI#{host => decode_host(lists:reverse(Acc))}.
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
@@ -898,15 +898,15 @@ parse_reg_name(?STRING_REST($:, Rest), URI) ->
parse_reg_name(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_reg_name(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
true -> parse_reg_name(Rest, URI);
@@ -924,20 +924,20 @@ parse_reg_name([H|Rest], Acc, URI) when is_binary(H) ->
parse_reg_name([H|Rest], Acc, URI) when is_list(H) ->
parse_reg_name(H ++ Rest, Acc, URI);
parse_reg_name([$:|Rest], Acc, URI) ->
- parse_port(Rest, [], URI#{host => lists:reverse(Acc)});
+ parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))});
parse_reg_name([$/|Rest], Acc, URI) ->
- parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty
+ parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty
parse_reg_name([$?|Rest], Acc, URI) ->
- parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query
+ parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query
parse_reg_name([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty
+ parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty
parse_reg_name([Char|Rest], Acc, URI) ->
case is_reg_name(Char) of
true -> parse_reg_name(Rest, [Char|Acc], URI);
false -> throw(uri_parse_error)
end;
parse_reg_name([], Acc, URI) ->
- URI#{host => lists:reverse(Acc)}.
+ URI#{host => decode_host(lists:reverse(Acc))}.
%% Check if char is allowed in reg-name
-spec is_reg_name(char()) -> boolean().
@@ -956,17 +956,17 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_segment(Rest, URI), % path-abempty
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
@@ -1062,15 +1062,15 @@ parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
true -> parse_ipv6_bin_end(Rest, URI);
@@ -1120,15 +1120,15 @@ validate_ipv6_address(Addr) ->
parse_port(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
{Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{path => ?STRING_REST($/, Path)}};
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_port(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
{Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
- {Rest, URI1#{query => ?STRING_REST($?, Query)}};
+ {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
true -> parse_port(Rest, URI);
@@ -1180,7 +1180,7 @@ parse_port([], Acc, URI) ->
parse_query(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
{Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
- {Rest, URI1#{fragment => Fragment}};
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
true -> parse_query(Rest, URI);
@@ -1198,18 +1198,19 @@ parse_query([H|Rest], Acc, URI) when is_binary(H) ->
parse_query([H|Rest], Acc, URI) when is_list(H) ->
parse_query(H ++ Rest, Acc, URI);
parse_query([$#|Rest], Acc, URI) ->
- parse_fragment(Rest, [], URI#{query => lists:reverse(Acc)});
+ parse_fragment(Rest, [], URI#{query => decode_query(lists:reverse(Acc))});
parse_query([Char|Rest], Acc, URI) ->
case is_query(Char) of
true -> parse_query(Rest, [Char|Acc], URI);
false -> throw(uri_parse_error)
end;
parse_query([], Acc, URI) ->
- URI#{query => lists:reverse(Acc)}.
+ URI#{query => decode_query(lists:reverse(Acc))}.
%% Check if char is allowed in query
-spec is_query(char()) -> boolean().
is_query($/) -> true;
+is_query($?) -> true;
is_query(Char) -> is_pchar(Char).
@@ -1245,7 +1246,7 @@ parse_fragment([Char|Rest], Acc, URI) ->
false -> throw(uri_parse_error)
end;
parse_fragment([], Acc, URI) ->
- URI#{fragment => lists:reverse(Acc)}.
+ URI#{fragment => decode_fragment(lists:reverse(Acc))}.
%% Check if char is allowed in fragment
-spec is_fragment(char()) -> boolean().
@@ -1339,3 +1340,90 @@ remove_brackets(?STRING_REST($[,Addr)) ->
A1 = binary:replace(Addr, <<$[>>, <<>>),
binary:replace(A1, <<$]>>, <<>>);
remove_brackets(Addr) -> Addr.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.1. Percent-Encoding]
+%%
+%% A percent-encoding mechanism is used to represent a data octet in a
+%% component when that octet's corresponding character is outside the
+%% allowed set or is being used as a delimiter of, or within, the
+%% component. A percent-encoded octet is encoded as a character
+%% triplet, consisting of the percent character "%" followed by the two
+%% hexadecimal digits representing that octet's numeric value. For
+%% example, "%20" is the percent-encoding for the binary octet
+%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+%% character (SP). Section 2.4 describes when percent-encoding and
+%% decoding is applied.
+%%
+%% pct-encoded = "%" HEXDIG HEXDIG
+%%-------------------------------------------------------------------------
+-spec decode_userinfo(list()|binary()) -> list() | binary().
+decode_userinfo(Cs) ->
+ decode(Cs, fun is_userinfo/1, <<>>).
+
+
+-spec decode_host(list()|binary()) -> list() | binary().
+decode_host(Cs) ->
+ decode(Cs, fun is_host/1, <<>>).
+
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+-spec decode_path(list()|binary()) -> list() | binary().
+decode_path(Cs) ->
+ decode(Cs, fun is_path/1, <<>>).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+
+is_path(Char) -> is_pchar(Char).
+
+
+-spec decode_query(list()|binary()) -> list() | binary().
+decode_query(Cs) ->
+ decode(Cs, fun is_query/1, <<>>).
+
+-spec decode_fragment(list()|binary()) -> list() | binary().
+decode_fragment(Cs) ->
+ decode(Cs, fun is_host/1, <<>>).
+
+
+-spec decode(list()|binary(), fun(), binary()) -> list() | binary().
+decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = hex2dec(C0)*16+hex2dec(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw(uri_parse_error)
+ end;
+decode(<<C,Cs/binary>>, Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw(uri_parse_error)
+ end;
+decode(<<>>, _Fun, Acc) ->
+ Acc;
+decode([$%,C0,C1|Cs], Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = hex2dec(C0)*16+hex2dec(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw(uri_parse_error)
+ end;
+decode([C|Cs], Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw(uri_parse_error)
+ end;
+decode([], _Fun, Acc) ->
+ unicode:characters_to_list(Acc).
+
+
+hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0;
+hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10;
+hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 9b8e52f0b2..c379eeb15b 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -24,10 +24,12 @@
-export([all/0, suite/0,groups/0,
parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
parse_binary_host_ipv6/1,
- parse_binary_path/1, parse_binary_port/1,
+ parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
+ parse_binary_pct_encoded_userinfo/1, parse_binary_port/1,
parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1,
parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1,
- parse_path/1, parse_port/1,
+ parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
+ parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1
]).
@@ -39,22 +41,28 @@ all() ->
[
parse_binary_scheme,
parse_binary_userinfo,
+ parse_binary_pct_encoded_userinfo,
parse_binary_host,
parse_binary_host_ipv4,
parse_binary_host_ipv6,
parse_binary_port,
parse_binary_path,
parse_binary_query,
+ parse_binary_pct_encoded_query,
parse_binary_fragment,
+ parse_binary_pct_encoded_fragment,
parse_scheme,
parse_userinfo,
+ parse_pct_encoded_userinfo,
parse_host,
parse_host_ipv4,
parse_host_ipv6,
parse_port,
parse_path,
parse_query,
+ parse_pct_encoded_query,
parse_fragment,
+ parse_pct_encoded_fragment,
parse_list,
parse_binary,
parse_mixed,
@@ -92,8 +100,27 @@ parse_binary_userinfo(_Config) ->
uri_string:parse(<<"foo://user@localhost">>),
#{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} =
uri_string:parse(<<"foo://user:password@localhost">>),
- uri_parse_error =(catch uri_string:parse("//user@")),
- uri_parse_error = (catch uri_string:parse("foo://user@")).
+ uri_parse_error =(catch uri_string:parse(<<"//user@">>)),
+ uri_parse_error = (catch uri_string:parse(<<"foo://user@">>)).
+
+parse_binary_pct_encoded_userinfo(_Config) ->
+ #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
+ uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} =
+ uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ uri_parse_error =(catch uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>)),
+ uri_parse_error = (catch uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>)).
parse_binary_host(_Config) ->
#{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
@@ -176,6 +203,12 @@ parse_binary_query(_Config) ->
#{host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} =
uri_string:parse(<<"//example.com/?name=ferret">>).
+parse_binary_pct_encoded_query(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
+ query := <<"?name=合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>).
parse_binary_fragment(_Config) ->
#{scheme := <<"foo">>, fragment := <<"nose">>} =
@@ -204,6 +237,12 @@ parse_binary_fragment(_Config) ->
#{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} =
uri_string:parse(<<"//example.com/#nose">>).
+parse_binary_pct_encoded_fragment(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>).
+
parse_scheme(_Config) ->
#{} = uri_string:parse(""),
#{path := "foo"} = uri_string:parse("foo"),
@@ -232,6 +271,26 @@ parse_userinfo(_Config) ->
#{scheme := "foo", userinfo := "user:password", host := "localhost"} =
uri_string:parse("foo://user:password@localhost").
+parse_pct_encoded_userinfo(_Config) ->
+ #{scheme := "user", path := "合@気道"} =
+ uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合", host := "気道"} =
+ uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合:気", host := "道"} =
+ uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"),
+ #{scheme := "foo", path := "/合気道@"} =
+ uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{scheme := "foo", userinfo := "合", host := "気道"} =
+ uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{scheme := "foo", userinfo := "合:気", host := "道"} =
+ uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"),
+ uri_parse_error =(catch uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@")),
+ uri_parse_error = (catch uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@")).
+
+
parse_host(_Config) ->
#{host := "hostname"} = uri_string:parse("//hostname"),
#{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"),
@@ -307,6 +366,12 @@ parse_query(_Config) ->
#{host := "example.com", path := "/", query := "?name=ferret"} =
uri_string:parse("//example.com/?name=ferret").
+parse_pct_encoded_query(_Config) ->
+ #{scheme := "foo", host := "example.com", path := "/",
+ query := "?name=合気道"} =
+ uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", query := "?name=合気道"} =
+ uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93").
parse_fragment(_Config) ->
#{scheme := "foo", fragment := "nose"} =
@@ -335,6 +400,11 @@ parse_fragment(_Config) ->
#{host := "example.com", path := "/", fragment := "nose"} =
uri_string:parse("//example.com/#nose").
+parse_pct_encoded_fragment(_Config) ->
+ #{scheme := "foo", host := "example.com", fragment := "合気道"} =
+ uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", fragment := "合気道"} =
+ uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93").
parse_list(_Config) ->
#{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),