From 1ff534f6e410c4904b6e65dbfc9135d34445685d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= <peterdmv@erlang.org>
Date: Fri, 8 Jun 2018 13:24:02 +0200
Subject: stdlib: Fix normalization function in uri_string

- Fix parsing of hostnames that start with a number.
- Update uri_string:parse/1 to be only responsible for parsing
  input URIs into URI components. Implicit percent-encoding
  normalization has been removed.
- Implement percent-encoding normalization.
- Update uri_string:normalize/{1,2} to include percent-encoding
  normalization.
- Update test suites according to the new semantics.
- Add new property test: normalize

Change-Id: I6f37dcae2b3fcb4b29d286dbb0dfc563e8f211ae
---
 lib/stdlib/src/uri_string.erl                      | 206 ++++++++++------
 .../test/property_test/uri_string_recompose.erl    | 260 ++++++++++++++++++--
 lib/stdlib/test/uri_string_SUITE.erl               | 261 ++++++++++++++++++---
 lib/stdlib/test/uri_string_property_test_SUITE.erl |   7 +-
 4 files changed, 593 insertions(+), 141 deletions(-)

diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 28d36ea229..48cce90d68 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -297,7 +297,10 @@
       NormalizedURI :: uri_string()
                      | error().
 normalize(URIMap) ->
-    normalize(URIMap, []).
+    try normalize(URIMap, [])
+    catch
+        throw:{error, Atom, RestData} -> {error, Atom, RestData}
+    end.
 
 
 -spec normalize(URI, Options) -> NormalizedURI when
@@ -523,34 +526,34 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
         {T, URI1} ->
             Userinfo = calculate_parsed_userinfo(Rest, T),
             URI2 = maybe_add_path(URI1),
-            URI2#{userinfo => decode_userinfo(Userinfo)}
+            URI2#{userinfo => Userinfo}
     catch
         throw:{_,_,_} ->
             {T, URI1} = parse_host(Rest, URI),
             Host = calculate_parsed_host_port(Rest, T),
             URI2 = maybe_add_path(URI1),
-            URI2#{host => decode_host(remove_brackets(Host))}
+            URI2#{host => remove_brackets(Host)}
     end;
 parse_relative_part(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-absolute
     Path = calculate_parsed_part(Rest, T),
-    URI1#{path => decode_path(?STRING_REST($/, Path))};
+    URI1#{path => ?STRING_REST($/, Path)};
 parse_relative_part(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
     URI2 = maybe_add_path(URI1),
-    URI2#{query => decode_query(Query)};
+    URI2#{query => Query};
 parse_relative_part(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
     URI2 = maybe_add_path(URI1),
-    URI2#{fragment => decode_fragment(Fragment)};
+    URI2#{fragment => Fragment};
 parse_relative_part(?STRING_REST(Char, Rest), URI) ->
     case is_segment_nz_nc(Char) of
         true ->
             {T, URI1} = parse_segment_nz_nc(Rest, URI),  % path-noscheme
             Path = calculate_parsed_part(Rest, T),
-            URI1#{path => decode_path(?STRING_REST(Char, Path))};
+            URI1#{path => ?STRING_REST(Char, Path)};
         false -> throw({error,invalid_uri,[Char]})
     end.
 
@@ -593,11 +596,11 @@ parse_segment(?STRING_REST($/, Rest), URI) ->
 parse_segment(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_segment(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_segment(?STRING_REST(Char, Rest), URI) ->
     case is_pchar(Char) of
         true -> parse_segment(Rest, URI);
@@ -616,11 +619,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
 parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
     case is_segment_nz_nc(Char) of
         true -> parse_segment_nz_nc(Rest, URI);
@@ -709,31 +712,31 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
     try parse_userinfo(Rest, URI) of
         {T, URI1} ->
             Userinfo = calculate_parsed_userinfo(Rest, T),
-	    {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
+	    {Rest, URI1#{userinfo => Userinfo}}
     catch
         throw:{_,_,_} ->
             {T, URI1} = parse_host(Rest, URI),
             Host = calculate_parsed_host_port(Rest, T),
-	    {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
+	    {Rest, URI1#{host => remove_brackets(Host)}}
     end;
 parse_hier(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-absolute
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_hier(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_hier(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_hier(?STRING_REST(Char, Rest), URI) ->  % path-rootless
     case is_pchar(Char) of
         true ->  % segment_nz
             {T, URI1} = parse_segment(Rest, URI),
             Path = calculate_parsed_part(Rest, T),
-            {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
+            {Rest, URI1#{path => ?STRING_REST(Char, Path)}};
         false -> throw({error,invalid_uri,[Char]})
     end;
 parse_hier(?STRING_EMPTY, URI) ->
@@ -770,7 +773,7 @@ parse_userinfo(?CHAR($@), URI) ->
 parse_userinfo(?STRING_REST($@, Rest), URI) ->
     {T, URI1} = parse_host(Rest, URI),
     Host = calculate_parsed_host_port(Rest, T),
-    {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
+    {Rest, URI1#{host => remove_brackets(Host)}};
 parse_userinfo(?STRING_REST(Char, Rest), URI) ->
     case is_userinfo(Char) of
         true -> parse_userinfo(Rest, URI);
@@ -836,20 +839,25 @@ parse_host(?STRING_REST($:, Rest), URI) ->
 parse_host(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-abempty
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_host(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_host(?STRING_REST($[, Rest), URI) ->
     parse_ipv6_bin(Rest, [], URI);
 parse_host(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_host(?STRING_REST(Char, Rest), URI) ->
     case is_digit(Char) of
-        true -> parse_ipv4_bin(Rest, [Char], URI);
+        true ->
+            try parse_ipv4_bin(Rest, [Char], URI)
+            catch
+                throw:{_,_,_} ->
+                    parse_reg_name(?STRING_REST(Char, Rest), URI)
+            end;
         false -> parse_reg_name(?STRING_REST(Char, Rest), URI)
     end;
 parse_host(?STRING_EMPTY, URI) ->
@@ -865,15 +873,15 @@ parse_reg_name(?STRING_REST($:, Rest), URI) ->
 parse_reg_name(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-abempty
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_reg_name(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_reg_name(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_reg_name(?STRING_REST(Char, Rest), URI) ->
     case is_reg_name(Char) of
         true -> parse_reg_name(Rest, URI);
@@ -899,17 +907,17 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
     _ = validate_ipv4_address(lists:reverse(Acc)),
     {T, URI1} = parse_segment(Rest, URI),  % path-abempty
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
     _ = validate_ipv4_address(lists:reverse(Acc)),
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
     _ = validate_ipv4_address(lists:reverse(Acc)),
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
     case is_ipv4(Char) of
         true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
@@ -961,15 +969,15 @@ parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
 parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-abempty
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
     case is_ipv6(Char) of
         true -> parse_ipv6_bin_end(Rest, URI);
@@ -999,15 +1007,15 @@ validate_ipv6_address(Addr) ->
 parse_port(?STRING_REST($/, Rest), URI) ->
     {T, URI1} = parse_segment(Rest, URI),  % path-abempty
     Path = calculate_parsed_part(Rest, T),
-    {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+    {Rest, URI1#{path => ?STRING_REST($/, Path)}};
 parse_port(?STRING_REST($?, Rest), URI) ->
     {T, URI1} = parse_query(Rest, URI),  % path-empty ?query
     Query = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{query => decode_query(Query)}};
+    {Rest, URI1#{query => Query}};
 parse_port(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),  % path-empty
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_port(?STRING_REST(Char, Rest), URI) ->
     case is_digit(Char) of
         true -> parse_port(Rest, URI);
@@ -1033,7 +1041,7 @@ parse_port(?STRING_EMPTY, URI) ->
 parse_query(?STRING_REST($#, Rest), URI) ->
     {T, URI1} = parse_fragment(Rest, URI),
     Fragment = calculate_parsed_query_fragment(Rest, T),
-    {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+    {Rest, URI1#{fragment => Fragment}};
 parse_query(?STRING_REST(Char, Rest), URI) ->
     case is_query(Char) of
         true -> parse_query(Rest, URI);
@@ -1088,6 +1096,31 @@ is_fragment(Char) -> is_pchar(Char).
 %%
 %%-------------------------------------------------------------------------
 
+%% Return true if input char is reserved.
+-spec is_reserved(char()) -> boolean().
+is_reserved($:) -> true;
+is_reserved($/) -> true;
+is_reserved($?) -> true;
+is_reserved($#) -> true;
+is_reserved($[) -> true;
+is_reserved($]) -> true;
+is_reserved($@) -> true;
+
+is_reserved($!) -> true;
+is_reserved($$) -> true;
+is_reserved($&) -> true;
+is_reserved($') -> true;
+is_reserved($() -> true;
+is_reserved($)) -> true;
+
+is_reserved($*) -> true;
+is_reserved($+) -> true;
+is_reserved($,) -> true;
+is_reserved($;) -> true;
+is_reserved($=) -> true;
+is_reserved(_) -> false.
+
+
 %% Check if char is sub-delim.
 -spec is_sub_delim(char()) -> boolean().
 is_sub_delim($!) -> true;
@@ -1276,36 +1309,6 @@ byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
 %%
 %%   pct-encoded = "%" HEXDIG HEXDIG
 %%-------------------------------------------------------------------------
--spec decode_userinfo(binary()) -> binary().
-decode_userinfo(Cs) ->
-    check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
-
--spec decode_host(binary()) -> binary().
-decode_host(Cs) ->
-    check_utf8(decode(Cs, fun is_host/1, <<>>)).
-
--spec decode_path(binary()) -> binary().
-decode_path(Cs) ->
-    check_utf8(decode(Cs, fun is_path/1, <<>>)).
-
--spec decode_query(binary()) -> binary().
-decode_query(Cs) ->
-    check_utf8(decode(Cs, fun is_query/1, <<>>)).
-
--spec decode_fragment(binary()) -> binary().
-decode_fragment(Cs) ->
-    check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
-
-
-%% Returns Cs if it is utf8 encoded.
-check_utf8(Cs) ->
-    case unicode:characters_to_list(Cs) of
-        {incomplete,_,_} ->
-            throw({error,invalid_utf8,Cs});
-        {error,_,_} ->
-            throw({error,invalid_utf8,Cs});
-        _ -> Cs
-    end.
 
 %%-------------------------------------------------------------------------
 %% Percent-encode
@@ -1351,20 +1354,56 @@ encode_fragment(Cs) ->
 %%-------------------------------------------------------------------------
 %% Helper funtions for percent-decode
 %%-------------------------------------------------------------------------
-decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+
+-spec decode(list()|binary()) -> list() | binary().
+decode(Cs) ->
+    decode(Cs, <<>>).
+%%
+decode(L, Acc) when is_list(L) ->
+    B0 = unicode:characters_to_binary(L),
+    B1 = decode(B0, Acc),
+    unicode:characters_to_list(B1);
+decode(<<$%,C0,C1,Cs/binary>>, Acc) ->
     case is_hex_digit(C0) andalso is_hex_digit(C1) of
         true ->
             B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
-            decode(Cs, Fun, <<Acc/binary, B>>);
+            case is_reserved(B) of
+                true ->
+                    %% [2.2] Characters in the reserved set are protected from
+                    %% normalization.
+                    %% [2.1] For consistency, URI producers and normalizers should
+                    %% use uppercase hexadecimal digits for all percent-
+                    %% encodings.
+                    H0 = hex_to_upper(C0),
+                    H1 = hex_to_upper(C1),
+                    decode(Cs, <<Acc/binary,$%,H0,H1>>);
+                false ->
+                    decode(Cs, <<Acc/binary, B>>)
+            end;
         false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
     end;
-decode(<<C,Cs/binary>>, Fun, Acc) ->
-    case Fun(C) of
-        true -> decode(Cs, Fun, <<Acc/binary, C>>);
-        false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
-    end;
-decode(<<>>, _Fun, Acc) ->
-    Acc.
+decode(<<C,Cs/binary>>, Acc) ->
+    decode(Cs, <<Acc/binary, C>>);
+decode(<<>>, Acc) ->
+    check_utf8(Acc).
+
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+    case unicode:characters_to_list(Cs) of
+        {incomplete,_,_} ->
+            throw({error,invalid_utf8,Cs});
+        {error,_,_} ->
+            throw({error,invalid_utf8,Cs});
+        _ -> Cs
+    end.
+
+%% Convert hex digit to uppercase form
+hex_to_upper(H) when $a =< H, H =< $f ->
+    H - 32;
+hex_to_upper(H) when $0 =< H, H =< $9;$A =< H, H =< $F->
+    H;
+hex_to_upper(H) ->
+    throw({error,invalid_input, H}).
 
 %% Check if char is allowed in host
 -spec is_host(char()) -> boolean().
@@ -1925,9 +1964,10 @@ base10_decode_unicode(<<H,_/binary>>, _, _) ->
 %%-------------------------------------------------------------------------
 
 normalize_map(URIMap) ->
-      normalize_path_segment(
-        normalize_scheme_based(
-          normalize_case(URIMap))).
+    normalize_path_segment(
+      normalize_scheme_based(
+        normalize_percent_encoding(
+          normalize_case(URIMap)))).
 
 
 %% 6.2.2.1.  Case Normalization
@@ -1942,6 +1982,18 @@ normalize_case(#{} = Map) ->
     Map.
 
 
+%% 6.2.2.2.  Percent-Encoding Normalization
+normalize_percent_encoding(Map) ->
+    Fun = fun (K,V) when K =:= userinfo; K =:= host; K =:= path;
+                         K =:= query; K =:= fragment ->
+                  decode(V);
+              %% Handle port and scheme
+              (_,V) ->
+                  V
+          end,
+    maps:map(Fun, Map).
+
+
 to_lower(Cs) when is_list(Cs) ->
     B = convert_to_binary(Cs, utf8, utf8),
     convert_to_list(to_lower(B), utf8);
diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl
index e51a671172..35b3a50b9c 100644
--- a/lib/stdlib/test/property_test/uri_string_recompose.erl
+++ b/lib/stdlib/test/property_test/uri_string_recompose.erl
@@ -65,15 +65,29 @@
 -define(QUERY, {query, query_map()}).
 -define(FRAGMENT, {fragment, fragment_map()}).
 
+%% Non-unicode
+-define(USER_NU, {userinfo, non_unicode()}).
+-define(HOST_NU, {host, host_map_nu()}).
+-define(PATH_ABE_NU, {path, path_abempty_map_nu()}).
+-define(PATH_ABS_NU, {path, path_absolute_map_nu()}).
+-define(PATH_NOS_NU, {path, path_noscheme_map_nu()}).
+-define(PATH_ROO_NU, {path, path_rootless_map_nu()}).
+-define(QUERY_NU, {query, query_map_nu()}).
+-define(FRAGMENT_NU, {fragment, fragment_map_nu()}).
 
 %%%========================================================================
 %%% Properties
 %%%========================================================================
 
 prop_recompose() ->
+    ?FORALL(Map, map_no_unicode(),
+           Map =:= uri_string:parse(uri_string:recompose(Map))).
+
+prop_normalize() ->
     ?FORALL(Map, map(),
-            Map =:= uri_string:parse(uri_string:recompose(Map))
-           ).
+            uri_string:normalize(Map, [return_map]) =:=
+                uri_string:normalize(uri_string:parse(uri_string:recompose(Map)),
+                                     [return_map])).
 
 %% Stats
 prop_map_key_length_collect() ->
@@ -96,6 +110,9 @@ prop_scheme_collect() ->
 map() ->
     ?LET(Gen, comp_proplist(), proplist_to_map(Gen)).
 
+map_no_unicode() ->
+    ?LET(Gen, comp_proplist_nu(), proplist_to_map(Gen)).
+
 comp_proplist() ->
     frequency([
                {2, [?SCHEME,?PATH_ABS]},
@@ -166,6 +183,76 @@ comp_proplist() ->
                {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}
               ]).
 
+comp_proplist_nu() ->
+    frequency([
+               {2, [?SCHEME,?PATH_ABS_NU]},
+               {2, [?SCHEME,?PATH_ROO_NU]},
+               {2, [?SCHEME,?PATH_EMP]},
+               {2, [?SCHEME,?HOST_NU,?PATH_ABE_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PATH_ABE_NU]},
+               {2, [?SCHEME,?HOST_NU,?PORT,?PATH_ABE_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU]},
+
+               {2, [?PATH_ABS_NU]},
+               {2, [?PATH_NOS_NU]},
+               {2, [?PATH_EMP]},
+               {2, [?HOST_NU,?PATH_ABE_NU]},
+               {2, [?USER_NU,?HOST_NU,?PATH_ABE_NU]},
+               {2, [?HOST_NU,?PORT,?PATH_ABE_NU]},
+               {2, [?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU]},
+
+
+               {2, [?SCHEME,?PATH_ABS_NU,?QUERY_NU]},
+               {2, [?SCHEME,?PATH_ROO_NU,?QUERY_NU]},
+               {2, [?SCHEME,?PATH_EMP,?QUERY_NU]},
+               {2, [?SCHEME,?HOST_NU,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?SCHEME,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU]},
+
+               {2, [?PATH_ABS_NU,?QUERY_NU]},
+               {2, [?PATH_NOS_NU,?QUERY_NU]},
+               {2, [?PATH_EMP,?QUERY_NU]},
+               {2, [?HOST_NU,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?USER_NU,?HOST_NU,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU]},
+               {2, [?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU]},
+
+
+               {2, [?SCHEME,?PATH_ABS_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?PATH_ROO_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?PATH_EMP,?FRAGMENT_NU]},
+               {2, [?SCHEME,?HOST_NU,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?HOST_NU,?PORT,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?FRAGMENT_NU]},
+
+               {2, [?PATH_ABS_NU,?FRAGMENT_NU]},
+               {2, [?PATH_NOS_NU,?FRAGMENT_NU]},
+               {2, [?PATH_EMP,?FRAGMENT_NU]},
+               {2, [?HOST_NU,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?USER_NU,?HOST_NU,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?HOST_NU,?PORT,?PATH_ABE_NU,?FRAGMENT_NU]},
+               {2, [?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?FRAGMENT_NU]},
+
+
+               {2, [?SCHEME,?PATH_ABS_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?PATH_ROO_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?PATH_EMP,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?HOST_NU,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?SCHEME,?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+
+               {2, [?PATH_ABS_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?PATH_NOS_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?PATH_EMP,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?HOST_NU,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?USER_NU,?HOST_NU,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]},
+               {2, [?USER_NU,?HOST_NU,?PORT,?PATH_ABE_NU,?QUERY_NU,?FRAGMENT_NU]}
+              ]).
+
 
 %%-------------------------------------------------------------------------
 %% Path
@@ -174,6 +261,11 @@ path_abempty_map() ->
     frequency([{90, path_abe_map()},
                {10, path_empty_map()}]).
 
+path_abempty_map_nu() ->
+    frequency([{90, path_abe_map_nu()},
+               {10, path_empty_map()}]).
+
+
 path_abe_map() ->
     ?SIZED(Length, path_abe_map(Length, [])).
 %%
@@ -182,6 +274,14 @@ path_abe_map(0, Segments) ->
 path_abe_map(N, Segments) ->
     path_abe_map(N-1, [slash(),segment()|Segments]).
 
+path_abe_map_nu() ->
+    ?SIZED(Length, path_abe_map_nu(Length, [])).
+%%
+path_abe_map_nu(0, Segments) ->
+    ?LET(Gen, Segments, lists:append(Gen));
+path_abe_map_nu(N, Segments) ->
+    path_abe_map_nu(N-1, [slash(),segment_nu()|Segments]).
+
 
 path_absolute_map() ->
     ?SIZED(Length, path_absolute_map(Length, [])).
@@ -191,6 +291,14 @@ path_absolute_map(0, Segments) ->
 path_absolute_map(N, Segments) ->
     path_absolute_map(N-1, [slash(),segment()|Segments]).
 
+path_absolute_map_nu() ->
+    ?SIZED(Length, path_absolute_map_nu(Length, [])).
+%%
+path_absolute_map_nu(0, Segments) ->
+    ?LET(Gen, [slash(),segment_nz_nu()|Segments], lists:append(Gen));
+path_absolute_map_nu(N, Segments) ->
+    path_absolute_map_nu(N-1, [slash(),segment_nu()|Segments]).
+
 
 path_noscheme_map() ->
     ?SIZED(Length, path_noscheme_map(Length, [])).
@@ -200,6 +308,15 @@ path_noscheme_map(0, Segments) ->
 path_noscheme_map(N, Segments) ->
     path_noscheme_map(N-1, [slash(),segment()|Segments]).
 
+path_noscheme_map_nu() ->
+    ?SIZED(Length, path_noscheme_map_nu(Length, [])).
+%%
+path_noscheme_map_nu(0, Segments) ->
+    ?LET(Gen, [segment_nz_nc_nu()|Segments], lists:append(Gen));
+path_noscheme_map_nu(N, Segments) ->
+    path_noscheme_map_nu(N-1, [slash(),segment_nu()|Segments]).
+
+
 path_rootless_map() ->
     ?SIZED(Length, path_rootless_map(Length, [])).
 %%
@@ -208,24 +325,59 @@ path_rootless_map(0, Segments) ->
 path_rootless_map(N, Segments) ->
     path_rootless_map(N-1, [slash(),segment()|Segments]).
 
+path_rootless_map_nu() ->
+    ?SIZED(Length, path_rootless_map_nu(Length, [])).
+%%
+path_rootless_map_nu(0, Segments) ->
+    ?LET(Gen, [segment_nz_nu()|Segments], lists:append(Gen));
+path_rootless_map_nu(N, Segments) ->
+    path_rootless_map_nu(N-1, [slash(),segment_nu()|Segments]).
+
 
 segment_nz() ->
     non_empty(segment()).
 
-segment_nz_nc() ->
-    non_empty(list(frequency([{30, unreserved()},
-                              {10, sub_delims()},
-                              {10, unicode_char()},
-                              {5, oneof([$@])}
-                             ]))).
+segment_nz_nu() ->
+    non_empty(segment_nu()).
 
 
+segment_nz_nc() ->
+    ?LET(Gen,
+         non_empty(list(frequency([{30, unreserved()},
+                                   {10, ptc_encoded_reserved()},
+                                   {10, sub_delims()},
+                                   {10, unicode_char()},
+                                   {5, oneof([$@])}
+                                  ]))),
+         lists:flatten(Gen)).
+
+segment_nz_nc_nu() ->
+    ?LET(Gen,
+         non_empty(list(frequency([{30, unreserved()},
+                                   {10, ptc_encoded_reserved()},
+                                   {10, sub_delims()},
+                                   {5, oneof([$@])}
+                                  ]))),
+         lists:flatten(Gen)).
+
 segment() ->
-    list(frequency([{30, unreserved()},
-                    {10, sub_delims()},
-                    {10, unicode_char()},
-                    {5, oneof([$:, $@])}
-                   ])).
+    ?LET(Gen,
+         list(frequency([{30, unreserved()},
+                         {10, ptc_encoded_reserved()},
+                         {10, sub_delims()},
+                         {10, unicode_char()},
+                         {5, oneof([$:, $@])}
+                        ])),
+         lists:flatten(Gen)).
+
+segment_nu() ->
+    ?LET(Gen,
+         list(frequency([{30, unreserved()},
+                         {10, ptc_encoded_reserved()},
+                         {10, sub_delims()},
+                         {5, oneof([$:, $@])}
+                        ])),
+         lists:flatten(Gen)).
 
 slash() ->
     "/".
@@ -235,19 +387,35 @@ path_empty_map() ->
 
 
 %%-------------------------------------------------------------------------
-%% Path
+%% Host
 %%-------------------------------------------------------------------------
 host_map() ->
     frequency([{30, reg_name()},
                {30, ip_address()}
               ]).
 
+host_map_nu() ->
+    frequency([{30, reg_name_nu()},
+               {30, ip_address()}
+              ]).
 
 reg_name() ->
-    list(frequency([{30, alpha()},
-                              {10, sub_delims()},
-                              {10, unicode_char()}
-                             ])).
+    ?LET(Gen,
+         list(frequency([{30, alpha()},
+                         {10, sub_delims()},
+                         {10, ptc_encoded_reserved()},
+                         {10, unicode_char()}
+                        ])),
+         lists:flatten(Gen)).
+
+reg_name_nu() ->
+    ?LET(Gen,
+         list(frequency([{30, alpha()},
+                         {10, sub_delims()},
+                         {10, ptc_encoded_reserved()}
+                        ])),
+         lists:flatten(Gen)).
+
 
 ip_address() ->
     oneof(["127.0.0.1", "::127.0.0.1",
@@ -258,10 +426,13 @@ ip_address() ->
 
 %% Generating only reg-names
 host_uri() ->
-    non_empty(list(frequency([{30, unreserved()},
-                              {10, sub_delims()},
-                              {10, pct_encoded()}
-                             ]))).
+    ?LET(Gen,
+         non_empty(list(frequency([{30, unreserved()},
+                                   {10, sub_delims()},
+                                   {10, ptc_encoded_reserved()},
+                                   {10, pct_encoded()}
+                                  ]))),
+         lists:flatten(Gen)).
 
 %%-------------------------------------------------------------------------
 %% Port, Query, Fragment
@@ -274,6 +445,9 @@ port() ->
 query_map() ->
     unicode().
 
+query_map_nu() ->
+    non_unicode().
+
 
 query_uri() ->
     [$?| non_empty(list(frequency([{20, pchar()},
@@ -283,6 +457,10 @@ query_uri() ->
 fragment_map() ->
     unicode().
 
+fragment_map_nu() ->
+    non_unicode().
+
+
 fragment_uri() ->
     [$?| non_empty(list(frequency([{20, pchar()},
                                    {5, oneof([$/, $?])} % punctuation
@@ -311,9 +489,14 @@ scheme(N, L) ->
 %%-------------------------------------------------------------------------
 unicode() ->
     list(frequency([{20, alpha()},                    % alpha
-               {10, digit()},                    % digit
-               {10, unicode_char()}              % unicode
-              ])).
+                    {10, digit()},                    % digit
+                    {10, unicode_char()}              % unicode
+                   ])).
+
+non_unicode() ->
+    list(frequency([{20, alpha()},                   % alpha
+                    {10, digit()}                    % digit
+                   ])).
 
 scheme_char() ->
     frequency([{20, alpha()},                    % alpha
@@ -327,6 +510,7 @@ sub_delims() ->
 
 pchar() ->
     frequency([{20, unreserved()},
+               {5, ptc_encoded_reserved()},
                {5, pct_encoded()},
                {5, sub_delims()},
                {1, oneof([$:, $@])}              % punctuation
@@ -351,6 +535,22 @@ digit() ->
 pct_encoded() ->
     oneof(["%C3%A4", "%C3%A5", "%C3%B6"]).
 
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%%   reserved    = gen-delims / sub-delims
+%%
+%%   gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%                 3A    2F    3F    23    5B    5D    40
+%%   sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
+%%                 21    24    26    27    28    29
+%%               / "*" / "+" / "," / ";" / "="
+%%                 2A    2B    2C    3B    3D
+%%-------------------------------------------------------------------------
+ptc_encoded_reserved() ->
+    oneof(["%3A","%2F","%3F","%23","%5B","%5D","%40",
+           "%21","%24","%26","%27","%28","%29",
+           "%2A","%2B","%2C","%3B","3D"]).
 
 %%%========================================================================
 %%% Helpers
@@ -359,3 +559,13 @@ proplist_to_map(L) ->
     lists:foldl(fun({K,V},M) -> M#{K => V};
                   (_,M) -> M
                end, #{}, L).
+
+map_scheme_host_to_lower(Map) ->
+    Fun = fun (scheme,V) ->
+                  string:to_lower(V);
+              (host,V) ->
+                  string:to_lower(V);
+              (_,V) ->
+                  V
+          end,
+    maps:map(Fun, Map).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 92f8bb3292..2aa399525d 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -23,6 +23,12 @@
 
 -export([all/0, suite/0,groups/0,
          normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1,
+         normalize_binary_pct_encoded_userinfo/1,
+         normalize_binary_pct_encoded_query/1,
+         normalize_binary_pct_encoded_fragment/1,
+         normalize_pct_encoded_userinfo/1,
+         normalize_pct_encoded_query/1,
+         normalize_pct_encoded_fragment/1,
          parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
          parse_binary_host_ipv6/1,
          parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
@@ -41,7 +47,8 @@
          transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1,
          compose_query/1, compose_query_latin1/1, compose_query_negative/1,
          dissect_query/1, dissect_query_negative/1,
-         interop_query_latin1/1, interop_query_utf8/1
+         interop_query_latin1/1, interop_query_utf8/1,
+         regression_parse/1, regression_recompose/1, regression_normalize/1
         ]).
 
 
@@ -71,6 +78,12 @@ all() ->
      normalize_map,
      normalize_return_map,
      normalize_negative,
+     normalize_binary_pct_encoded_userinfo,
+     normalize_binary_pct_encoded_query,
+     normalize_binary_pct_encoded_fragment,
+     normalize_pct_encoded_userinfo,
+     normalize_pct_encoded_query,
+     normalize_pct_encoded_fragment,
      parse_binary_scheme,
      parse_binary_userinfo,
      parse_binary_pct_encoded_userinfo,
@@ -120,7 +133,10 @@ all() ->
      dissect_query,
      dissect_query_negative,
      interop_query_latin1,
-     interop_query_utf8
+     interop_query_utf8,
+     regression_parse,
+     regression_recompose,
+     regression_normalize
     ].
 
 groups() ->
@@ -338,20 +354,23 @@ parse_binary_userinfo(_Config) ->
         uri_string:parse(<<"foo://user:password@localhost">>).
 
 parse_binary_pct_encoded_userinfo(_Config) ->
-    #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
+    #{scheme := <<"user">>, path := <<"%E5%90%88@%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>),
-    #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>),
-    #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>),
-    #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>),
-    #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+    #{path := <<"%E5%90%88%E6%B0%97%E9%81%93@">>} =
+        uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>),
+    #{path := <<"/%E5%90%88%E6%B0%97%E9%81%93@">>} =
+        uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>),
+    #{path := <<"%E5%90%88@%E6%B0%97%E9%81%93">>} =
+        uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>),
+    #{userinfo := <<"%E5%90%88">>, host := <<"%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>),
-    #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+    #{userinfo := <<"%E5%90%88:%E6%B0%97">>, host := <<"%E9%81%93">>} =
         uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>),
-    #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} =
+    #{scheme := <<"foo">>, path := <<"/%E5%90%88%E6%B0%97%E9%81%93@">>} =
         uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>),
-    #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+    #{scheme := <<"foo">>, userinfo := <<"%E5%90%88">>, host := <<"%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>),
-    #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+    #{scheme := <<"foo">>, userinfo := <<"%E5%90%88:%E6%B0%97">>, host := <<"%E9%81%93">>} =
         uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>),
     {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>),
     {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>).
@@ -369,8 +388,8 @@ parse_binary_host_ipv4(_Config) ->
     #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} =
         uri_string:parse(<<"//127.0.0.1?name=ferret">>),
     #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>),
-    {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>),
-    {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>).
+    #{host := <<"127.0.0.x">>,path := <<>>} = uri_string:parse(<<"//127.0.0.x">>),
+    #{host := <<"1227.0.0.1">>,path := <<>>} = uri_string:parse(<<"//1227.0.0.1">>).
 
 parse_binary_host_ipv6(_Config) ->
     #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>),
@@ -439,9 +458,9 @@ parse_binary_query(_Config) ->
 
 parse_binary_pct_encoded_query(_Config) ->
     #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
-      query := <<"name=合気道"/utf8>>} =
+      query := <<"name=%E5%90%88%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>),
-    #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} =
+    #{host := <<"example.com">>, path := <<"/">>, query := <<"name=%E5%90%88%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>).
 
 parse_binary_fragment(_Config) ->
@@ -472,9 +491,11 @@ parse_binary_fragment(_Config) ->
         uri_string:parse(<<"//example.com/#nose">>).
 
 parse_binary_pct_encoded_fragment(_Config) ->
-    #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} =
+    #{scheme := <<"foo">>, host := <<"example.com">>,
+      fragment := <<"%E5%90%88%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>),
-    #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} =
+    #{host := <<"example.com">>, path := <<"/">>,
+      fragment := <<"%E5%90%88%E6%B0%97%E9%81%93">>} =
         uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>).
 
 parse_scheme(_Config) ->
@@ -506,25 +527,27 @@ parse_userinfo(_Config) ->
         uri_string:parse("foo://user:password@localhost").
 
 parse_pct_encoded_userinfo(_Config) ->
-    #{scheme := "user", path := "合@気道"} =
+    #{scheme := "user", path := "%E5%90%88@%E6%B0%97%E9%81%93"} =
         uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"),
-    #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"),
-    #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"),
-    #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"),
-    #{userinfo := "合", host := "気道"} =
+    #{path := "%E5%90%88%E6%B0%97%E9%81%93@"} =
+        uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"),
+    #{path := "/%E5%90%88%E6%B0%97%E9%81%93@"} =
+        uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"),
+    #{path := "%E5%90%88@%E6%B0%97%E9%81%93"} =
+        uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"),
+    #{userinfo := "%E5%90%88", host := "%E6%B0%97%E9%81%93"} =
         uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"),
-    #{userinfo := "合:気", host := "道"} =
+    #{userinfo := "%E5%90%88:%E6%B0%97", host := "%E9%81%93"} =
         uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"),
-    #{scheme := "foo", path := "/合気道@"} =
+    #{scheme := "foo", path := "/%E5%90%88%E6%B0%97%E9%81%93@"} =
         uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"),
-    #{scheme := "foo", userinfo := "合", host := "気道"} =
+    #{scheme := "foo", userinfo := "%E5%90%88", host := "%E6%B0%97%E9%81%93"} =
         uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"),
-    #{scheme := "foo", userinfo := "合:気", host := "道"} =
+    #{scheme := "foo", userinfo := "%E5%90%88:%E6%B0%97", host := "%E9%81%93"} =
         uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"),
     {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"),
     {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@").
 
-
 parse_host(_Config) ->
     #{host := "hostname"} = uri_string:parse("//hostname"),
     #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"),
@@ -538,8 +561,8 @@ parse_host_ipv4(_Config) ->
     #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"),
     #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
     #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"),
-    {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"),
-    {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1").
+    #{host := "127.0.0.x",path := []} = uri_string:parse("//127.0.0.x"),
+    #{host := "1227.0.0.1",path := []} = uri_string:parse("//1227.0.0.1").
 
 parse_host_ipv6(_Config) ->
     #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"),
@@ -602,9 +625,9 @@ parse_query(_Config) ->
 
 parse_pct_encoded_query(_Config) ->
     #{scheme := "foo", host := "example.com", path := "/",
-      query := "name=合気道"} =
+      query := "name=%E5%90%88%E6%B0%97%E9%81%93"} =
         uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"),
-    #{host := "example.com", path := "/", query := "name=合気道"} =
+    #{host := "example.com", path := "/", query := "name=%E5%90%88%E6%B0%97%E9%81%93"} =
         uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93").
 
 parse_fragment(_Config) ->
@@ -635,9 +658,11 @@ parse_fragment(_Config) ->
         uri_string:parse("//example.com/#nose").
 
 parse_pct_encoded_fragment(_Config) ->
-    #{scheme := "foo", host := "example.com", fragment := "合気道"} =
+    #{scheme := "foo", host := "example.com",
+      fragment := "%E5%90%88%E6%B0%97%E9%81%93"} =
         uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"),
-    #{host := "example.com", path := "/", fragment := "合気道"} =
+    #{host := "example.com", path := "/",
+      fragment := "%E5%90%88%E6%B0%97%E9%81%93"} =
         uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93").
 
 parse_list(_Config) ->
@@ -711,9 +736,7 @@ parse_negative(_Config) ->
     {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"),
     {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"),
     {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"),
-    {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"),
     {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"),
-    {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"),
     {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8").
 
 
@@ -913,7 +936,9 @@ normalize(_Config) ->
     <<"sftp://localhost">> =
         uri_string:normalize(<<"sftp://localhost:22">>),
     <<"tftp://localhost">> =
-        uri_string:normalize(<<"tftp://localhost:69">>).
+        uri_string:normalize(<<"tftp://localhost:69">>),
+    <<"/foo/%2F/bar">> =
+        uri_string:normalize(<<"/foo/%2f/%62ar">>).
 
 normalize_map(_Config) ->
     "/a/g" = uri_string:normalize(#{path => "/a/b/c/./../../g"}),
@@ -942,7 +967,9 @@ normalize_map(_Config) ->
                                host => <<"localhost">>}),
     <<"tftp://localhost">> =
         uri_string:normalize(#{scheme => <<"tftp">>,port => 69,path => <<>>,
-                               host => <<"localhost">>}).
+                               host => <<"localhost">>}),
+    "/foo/%2F/bar" =
+        uri_string:normalize(#{path => "/foo/%2f/%62ar"}).
 
 normalize_return_map(_Config) ->
     #{scheme := "http",path := "/a/g",host := "localhost-örebro"} =
@@ -963,7 +990,82 @@ normalize_negative(_Config) ->
     {error,invalid_uri,":"} =
         uri_string:normalize("http://[192.168.0.1]", [return_map]),
     {error,invalid_uri,":"} =
-        uri_string:normalize(<<"http://[192.168.0.1]">>, [return_map]).
+        uri_string:normalize(<<"http://[192.168.0.1]">>, [return_map]),
+    {error,invalid_utf8,<<0,0,0,246>>} = uri_string:normalize("//%00%00%00%F6").
+
+normalize_binary_pct_encoded_userinfo(_Config) ->
+    #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
+        uri_string:normalize(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>, [return_map]),
+    #{path := <<"合気道@"/utf8>>} =
+        uri_string:normalize(<<"%E5%90%88%E6%B0%97%E9%81%93@">>, [return_map]),
+    #{path := <<"/合気道@"/utf8>>} =
+        uri_string:normalize(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>, [return_map]),
+    #{path := <<"合@気道"/utf8>>} =
+        uri_string:normalize(<<"%E5%90%88@%E6%B0%97%E9%81%93">>, [return_map]),
+    #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+        uri_string:normalize(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>, [return_map]),
+    #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+        uri_string:normalize(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>, [return_map]),
+    #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} =
+        uri_string:normalize(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>, [return_map]),
+    #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+        uri_string:normalize(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>, [return_map]),
+    #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+        uri_string:normalize(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>, [return_map]),
+    {error,invalid_uri,"@"} =
+        uri_string:normalize(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>, [return_map]),
+    {error,invalid_uri,":"} =
+        uri_string:normalize(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>, [return_map]).
+
+normalize_binary_pct_encoded_query(_Config) ->
+    #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
+      query := <<"name=合気道"/utf8>>} =
+        uri_string:normalize(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>, [return_map]),
+    #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} =
+        uri_string:normalize(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>, [return_map]).
+
+normalize_binary_pct_encoded_fragment(_Config) ->
+    #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} =
+        uri_string:normalize(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>, [return_map]),
+    #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} =
+        uri_string:normalize(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>, [return_map]).
+
+normalize_pct_encoded_userinfo(_Config) ->
+    #{scheme := "user", path := "合@気道"} =
+        uri_string:normalize("user:%E5%90%88@%E6%B0%97%E9%81%93", [return_map]),
+    #{path := "合気道@"} =
+        uri_string:normalize("%E5%90%88%E6%B0%97%E9%81%93@", [return_map]),
+    #{path := "/合気道@"} =
+        uri_string:normalize("/%E5%90%88%E6%B0%97%E9%81%93@", [return_map]),
+    #{path := "合@気道"} =
+        uri_string:normalize("%E5%90%88@%E6%B0%97%E9%81%93", [return_map]),
+    #{userinfo := "合", host := "気道"} =
+        uri_string:normalize("//%E5%90%88@%E6%B0%97%E9%81%93", [return_map]),
+    #{userinfo := "合:気", host := "道"} =
+        uri_string:normalize("//%E5%90%88:%E6%B0%97@%E9%81%93", [return_map]),
+    #{scheme := "foo", path := "/合気道@"} =
+        uri_string:normalize("foo:/%E5%90%88%E6%B0%97%E9%81%93@", [return_map]),
+    #{scheme := "foo", userinfo := "合", host := "気道"} =
+        uri_string:normalize("foo://%E5%90%88@%E6%B0%97%E9%81%93", [return_map]),
+    #{scheme := "foo", userinfo := "合:気", host := "道"} =
+        uri_string:normalize("foo://%E5%90%88:%E6%B0%97@%E9%81%93", [return_map]),
+    {error,invalid_uri,"@"} =
+        uri_string:normalize("//%E5%90%88@%E6%B0%97%E9%81%93@", [return_map]),
+    {error,invalid_uri,":"} =
+        uri_string:normalize("foo://%E5%90%88@%E6%B0%97%E9%81%93@", [return_map]).
+
+normalize_pct_encoded_query(_Config) ->
+    #{scheme := "foo", host := "example.com", path := "/",
+      query := "name=合気道"} =
+        uri_string:normalize("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93", [return_map]),
+    #{host := "example.com", path := "/", query := "name=合気道"} =
+        uri_string:normalize("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93", [return_map]).
+
+normalize_pct_encoded_fragment(_Config) ->
+    #{scheme := "foo", host := "example.com", fragment := "合気道"} =
+        uri_string:normalize("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93", [return_map]),
+    #{host := "example.com", path := "/", fragment := "合気道"} =
+        uri_string:normalize("//example.com/#%E5%90%88%E6%B0%97%E9%81%93", [return_map]).
 
 interop_query_utf8(_Config) ->
     Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
@@ -977,3 +1079,86 @@ interop_query_latin1(_Config) ->
     Uri1 = uri_string:transcode(Uri, [{in_encoding, latin1}]),
     #{query := Q1} = uri_string:parse(Uri1),
     [{"foo bar","1"}, {"合", "2"}] = uri_string:dissect_query(Q1).
+
+regression_parse(_Config) ->
+    #{host := "Bar",path := [],scheme := "FOo"} =
+        uri_string:parse("FOo://Bar"),
+    #{host := "bar",path := [],scheme := "foo"} =
+        uri_string:parse("foo://bar"),
+    #{host := "A%2f",path := "/%62ar",scheme := "foo"} =
+        uri_string:parse("foo://A%2f/%62ar"),
+    #{host := "a%2F",path := "/bar",scheme := "foo"} =
+        uri_string:parse("foo://a%2F/bar"),
+    #{host := "%C3%B6",path := [],scheme := "FOo"} =
+        uri_string:parse("FOo://%C3%B6").
+
+regression_recompose(_Config) ->
+    "FOo://Bar" =
+        uri_string:recompose(#{host => "Bar",path => [],scheme => "FOo"}),
+    "foo://bar" =
+        uri_string:recompose(#{host => "bar",path => [],scheme => "foo"}),
+    "foo://A%2f/%62ar" =
+        uri_string:recompose(#{host => "A%2f",path => "/%62ar",scheme => "foo"}),
+    "foo://a%2F/bar" =
+        uri_string:recompose(#{host => "a%2F",path => "/bar",scheme => "foo"}),
+    "FOo://%C3%B6" =
+        uri_string:recompose(#{host => "%C3%B6",path => [],scheme => "FOo"}),
+    "FOo://%C3%B6" =
+        uri_string:recompose(#{host => "ö",path => [],scheme => "FOo"}).
+
+regression_normalize(_Config) ->
+    "foo://bar" =
+        uri_string:normalize("FOo://Bar"),
+    #{host := "bar",path := [],scheme := "foo"} =
+        uri_string:normalize("FOo://Bar", [return_map]),
+
+    "foo://bar" =
+        uri_string:normalize("foo://bar"),
+    #{host := "bar",path := [],scheme := "foo"} =
+        uri_string:normalize("foo://bar", [return_map]),
+
+    "foo://a%2F/bar" =
+        uri_string:normalize("foo://A%2f/%62ar"),
+    #{host := "a%2F",path := "/bar",scheme := "foo"} =
+        uri_string:normalize("foo://A%2f/%62ar", [return_map]),
+
+    "foo://a%2F/bar" =
+        uri_string:normalize("foo://a%2F/bar"),
+    #{host := "a%2F",path := "/bar",scheme := "foo"} =
+        uri_string:normalize("foo://a%2F/bar", [return_map]),
+
+    "foo://%C3%B6" =
+        uri_string:normalize("FOo://%C3%B6"),
+    #{host := "ö",path := [],scheme := "foo"} =
+        uri_string:normalize("FOo://%C3%B6", [return_map]),
+
+
+    "foo://bar" =
+        uri_string:normalize(#{host => "Bar",path => [],scheme => "FOo"}),
+    #{host := "bar",path := [],scheme := "foo"} =
+        uri_string:normalize(#{host => "Bar",path => [],scheme => "FOo"}, [return_map]),
+
+    "foo://bar" =
+        uri_string:normalize(#{host => "bar",path => [],scheme => "foo"}),
+    #{host := "bar",path := [],scheme := "foo"} =
+        uri_string:normalize(#{host => "bar",path => [],scheme => "foo"}, [return_map]),
+
+    "foo://a%2F/bar" =
+        uri_string:normalize(#{host => "A%2f",path => "/%62ar",scheme => "foo"}),
+    #{host := "a%2F",path := "/bar",scheme := "foo"} =
+        uri_string:normalize(#{host => "A%2f",path => "/%62ar",scheme => "foo"}, [return_map]),
+
+    "foo://a%2F/bar" =
+        uri_string:normalize(#{host => "a%2F",path => "/bar",scheme => "foo"}),
+    #{host := "a%2F",path := "/bar",scheme := "foo"} =
+        uri_string:normalize(#{host => "a%2F",path => "/bar",scheme => "foo"}, [return_map]),
+
+    "foo://%C3%B6" =
+        uri_string:normalize(#{host => "%C3%B6",path => [],scheme => "FOo"}),
+    #{host := "ö",path := [],scheme := "foo"} =
+        uri_string:normalize(#{host => "%C3%B6",path => [],scheme => "FOo"}, [return_map]),
+
+    "foo://%C3%B6" =
+        uri_string:normalize(#{host => "ö",path => [],scheme => "FOo"}),
+    #{host := "ö",path := [],scheme := "foo"} =
+        uri_string:normalize(#{host => "ö",path => [],scheme => "FOo"}, [return_map]).
diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl
index ae2c61c7aa..b01dd9bf65 100644
--- a/lib/stdlib/test/uri_string_property_test_SUITE.erl
+++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl
@@ -22,7 +22,7 @@
 -include_lib("common_test/include/ct.hrl").
 -compile(export_all).
 
-all() -> [recompose].
+all() -> [recompose, normalize].
 
 init_per_suite(Config) ->
     ct_property_test:init_per_suite(Config).
@@ -37,3 +37,8 @@ recompose(Config) ->
     ct_property_test:quickcheck(
       uri_string_recompose:prop_recompose(),
       Config).
+
+normalize(Config) ->
+    ct_property_test:quickcheck(
+      uri_string_recompose:prop_normalize(),
+      Config).
-- 
cgit v1.2.3