diff options
-rwxr-xr-x | lib/stdlib/src/uri_string.erl | 224 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/uri_string_decode.erl | 55 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/uri_string_recompose.erl | 360 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 36 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_property_test_SUITE.erl | 15 |
5 files changed, 566 insertions, 124 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index bb7079c193..893ba4c6bf 100755 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -304,8 +304,6 @@ parse(URIString) when is_list(URIString) -> -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), URIString :: uri_string(). -recompose(Map) when map_size(Map) =:= 0 -> - ""; recompose(Map) -> case is_valid_map(Map) of false -> @@ -405,7 +403,7 @@ convert_mapfields_to_list(Map) -> %% URI-reference = URI / relative-ref %%------------------------------------------------------------------------- -spec parse_uri_reference(binary(), uri_map()) -> uri_map(). -parse_uri_reference(<<>>, _) -> #{}; +parse_uri_reference(<<>>, _) -> #{path => <<>>}; parse_uri_reference(URIString, URI) -> try parse_scheme_start(URIString, URI) of Res -> Res @@ -434,13 +432,15 @@ parse_relative_part(?STRING_REST("//", Rest), URI) -> %% Parse userinfo - "//" is NOT part of authority try parse_userinfo(Rest, URI) of {T, URI1} -> - Userinfo = calculate_parsed_part(Rest, T), - URI1#{userinfo => decode_userinfo(Userinfo)} + Userinfo = calculate_parsed_userinfo(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{userinfo => decode_userinfo(Userinfo)} catch throw:uri_parse_error -> {T, URI1} = parse_host(Rest, URI), Host = calculate_parsed_part_sl(Rest, T), - URI1#{host => decode_host(remove_brackets(Host))} + URI2 = maybe_add_path(URI1), + URI2#{host => decode_host(remove_brackets(Host))} end; parse_relative_part(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute @@ -449,11 +449,13 @@ parse_relative_part(?STRING_REST($/, Rest), URI) -> parse_relative_part(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_part(Rest, T), - URI1#{query => decode_query(?STRING_REST($?, Query))}; + URI2 = maybe_add_path(URI1), + URI2#{query => decode_query(?STRING_REST($?, Query))}; parse_relative_part(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), - URI1#{fragment => decode_fragment(Fragment)}; + Fragment = calculate_parsed_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{fragment => decode_fragment(Fragment)}; parse_relative_part(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of true -> @@ -505,7 +507,7 @@ parse_segment(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_segment(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment(?STRING_REST(Char, Rest), URI) -> case is_pchar(Char) of @@ -528,7 +530,7 @@ parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of @@ -566,10 +568,32 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) -> case is_alpha(Char) of true -> {T, URI1} = parse_scheme(Rest, URI), Scheme = calculate_parsed_scheme(Rest, T), - URI1#{scheme => ?STRING_REST(Char, Scheme)}; + URI2 = maybe_add_path(URI1), + URI2#{scheme => ?STRING_REST(Char, Scheme)}; false -> throw(uri_parse_error) end. +%% Add path component if it missing after parsing the URI. +%% According to the URI specification there is always a +%% path component in every URI-reference and it can be +%% empty. + +%% maybe_add_path(Map) -> +%% case length(maps:keys(Map)) of +%% 0 -> +%% Map#{path => <<>>}; +%% _Else -> +%% Map +%% end. +maybe_add_path(Map) -> + case maps:is_key(path, Map) of + false -> + Map#{path => <<>>}; + _Else -> + Map + end. + + -spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. parse_scheme(?STRING_REST($:, Rest), URI) -> @@ -603,7 +627,7 @@ parse_hier(?STRING_REST("//", Rest), URI) -> % Parse userinfo - "//" is NOT part of authority try parse_userinfo(Rest, URI) of {T, URI1} -> - Userinfo = calculate_parsed_part(Rest, T), + Userinfo = calculate_parsed_userinfo(Rest, T), {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} catch throw:uri_parse_error -> @@ -621,7 +645,7 @@ parse_hier(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_hier(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless case is_pchar(Char) of @@ -660,12 +684,11 @@ parse_hier(?STRING_EMPTY, URI) -> %% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) %%------------------------------------------------------------------------- -spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. -parse_userinfo(?CHAR($@), _URI) -> - %% URI cannot end in userinfo state - throw(uri_parse_error); +parse_userinfo(?CHAR($@), URI) -> + {?STRING_EMPTY, URI#{host => <<>>}}; parse_userinfo(?STRING_REST($@, Rest), URI) -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_part(Rest, T), + Host = calculate_parsed_host(Rest, T), {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; parse_userinfo(?STRING_REST(Char, Rest), URI) -> case is_userinfo(Char) of @@ -726,7 +749,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). -spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. parse_host(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_part(Rest, T), + H = calculate_parsed_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_host(?STRING_REST($/, Rest), URI) -> @@ -741,7 +764,7 @@ parse_host(?STRING_REST($[, Rest), URI) -> parse_ipv6_bin(Rest, [], URI); parse_host(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_host(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of @@ -755,7 +778,7 @@ parse_host(?STRING_EMPTY, URI) -> -spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. parse_reg_name(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_part(Rest, T), + H = calculate_parsed_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_reg_name(?STRING_REST($/, Rest), URI) -> @@ -768,7 +791,7 @@ parse_reg_name(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_reg_name(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_reg_name(?STRING_REST(Char, Rest), URI) -> case is_reg_name(Char) of @@ -788,7 +811,7 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_part(Rest, T), + H = calculate_parsed_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> @@ -804,7 +827,7 @@ parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> case is_ipv4(Char) of @@ -851,7 +874,7 @@ is_ipv6(Char) -> is_hex_digit(Char). -spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_part(Rest, T), + H = calculate_parsed_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> @@ -864,7 +887,7 @@ parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> case is_ipv6(Char) of @@ -902,7 +925,7 @@ parse_port(?STRING_REST($?, Rest), URI) -> {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_port(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_port(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of @@ -928,7 +951,7 @@ parse_port(?STRING_EMPTY, URI) -> -spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. parse_query(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_part(Rest, T), + Fragment = calculate_parsed_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_query(?STRING_REST(Char, Rest), URI) -> case is_query(Char) of @@ -1055,11 +1078,88 @@ remove_brackets(Addr) -> Addr. -spec calculate_parsed_part(binary(), binary()) -> binary(). calculate_parsed_part(<<$?>>, _) -> <<>>; calculate_parsed_part(<<$#>>, _) -> <<>>; +calculate_parsed_part(<<>>, _) -> <<>>; +calculate_parsed_part(Input, <<>>) -> + case binary:last(Input) of + $? -> + init_binary(Input); + $# -> + init_binary(Input); + _Else -> + Input + end; calculate_parsed_part(Input, Unparsed) -> {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), First. +-spec calculate_parsed_userinfo(binary(), binary()) -> binary(). +calculate_parsed_userinfo(<<$?>>, _) -> <<>>; +calculate_parsed_userinfo(<<$#>>, _) -> <<>>; +calculate_parsed_userinfo(<<>>, _) -> <<>>; +calculate_parsed_userinfo(Input, <<>>) -> + case binary:last(Input) of + $? -> + init_binary(Input); + $# -> + init_binary(Input); + $@ -> + init_binary(Input); + _Else -> + Input + end; +calculate_parsed_userinfo(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +-spec calculate_parsed_host(binary(), binary()) -> binary(). +calculate_parsed_host(<<$?>>, _) -> <<>>; +calculate_parsed_host(<<$#>>, _) -> <<>>; +calculate_parsed_host(<<>>, _) -> <<>>; +calculate_parsed_host(Input, <<>>) -> + case binary:last(Input) of + $? -> + init_binary(Input); + $# -> + init_binary(Input); + $/ -> + init_binary(Input); + _Else -> + Input + end; +calculate_parsed_host(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +-spec calculate_parsed_port(binary(), binary()) -> binary(). +calculate_parsed_port(<<$?>>, _) -> <<>>; +calculate_parsed_port(<<$#>>, _) -> <<>>; +calculate_parsed_port(<<>>, _) -> <<>>; +calculate_parsed_port(Input, <<>>) -> + case binary:last(Input) of + $? -> + init_binary(Input); + $# -> + init_binary(Input); + $/ -> + init_binary(Input); + _Else -> + Input + end; +calculate_parsed_port(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +-spec calculate_parsed_fragment(binary(), binary()) -> binary(). +calculate_parsed_fragment(<<$#>>, _) -> <<>>; +calculate_parsed_fragment(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + %% Returns the parsed binary based on Input and the Unparsed part. %% Used when parsing authority. %% @@ -1079,28 +1179,25 @@ calculate_parsed_part_sl(<<>>, _) -> <<>>; calculate_parsed_part_sl(Input, <<>>) -> case binary:last(Input) of $? -> - {First, _} = - split_binary(Input, byte_size(Input) - 1), - First; - + init_binary(Input); $# -> - {First, _} = - split_binary(Input, byte_size(Input) - 1), - First; + init_binary(Input); $/ -> - {First, _} = - split_binary(Input, byte_size(Input) - 1), - First; + init_binary(Input); _Else -> - {First, _} = - split_binary(Input, byte_size_exl_single_slash(Input)), - First + Input end; calculate_parsed_part_sl(Input, Unparsed) -> {First, _} = split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)), First. +%% Return all bytes of the binary except the last one. The binary must be non-empty. +init_binary(B) -> + {Init, _} = + split_binary(B, byte_size(B) - 1), + Init. + %% Returns the parsed binary based on Input and the Unparsed part. %% Used when parsing scheme. @@ -1109,6 +1206,7 @@ calculate_parsed_scheme(Input, Unparsed) -> {First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1), First. + %% Returns the size of a binary exluding the first element. %% Used in calls to split_binary(). -spec byte_size_exl_head(binary()) -> number(). @@ -1149,25 +1247,35 @@ byte_size_exl_single_slash(Rest) -> byte_size(Rest). %%------------------------------------------------------------------------- -spec decode_userinfo(binary()) -> binary(). decode_userinfo(Cs) -> - decode(Cs, fun is_userinfo/1, <<>>). + check_utf8(decode(Cs, fun is_userinfo/1, <<>>)). -spec decode_host(binary()) -> binary(). decode_host(Cs) -> - decode(Cs, fun is_host/1, <<>>). + check_utf8(decode(Cs, fun is_host/1, <<>>)). -spec decode_path(binary()) -> binary(). decode_path(Cs) -> - decode(Cs, fun is_path/1, <<>>). + check_utf8(decode(Cs, fun is_path/1, <<>>)). -spec decode_query(binary()) -> binary(). decode_query(Cs) -> - decode(Cs, fun is_query/1, <<>>). + check_utf8(decode(Cs, fun is_query/1, <<>>)). -spec decode_fragment(binary()) -> binary(). decode_fragment(Cs) -> - decode(Cs, fun is_fragment/1, <<>>). + check_utf8(decode(Cs, fun is_fragment/1, <<>>)). +%% Returns Cs if it is utf8 encoded. +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {incomplete,_,_} -> + throw(uri_parse_error); + {error,_,_} -> + throw(uri_parse_error); + _ -> Cs + end. + %%------------------------------------------------------------------------- %% Percent-encode %%------------------------------------------------------------------------- @@ -1368,10 +1476,15 @@ bracket_ipv6(Addr) when is_list(Addr) -> %% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"} %% There is always at least an empty host when both userinfo and port %% are present. +%% - #{path => "///"} otherwise the following would be true: +%% "/////" = uri_string:recompose(#{host => "", path => "///"}) +%% "/////" = uri_string:recompose(#{path => "/////"}) +%% AND +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] %%------------------------------------------------------------------------- is_valid_map(Map) -> case - (not maps:is_key(userinfo, Map) andalso + ((not maps:is_key(userinfo, Map) andalso not maps:is_key(host, Map) andalso maps:is_key(port, Map)) orelse @@ -1381,7 +1494,9 @@ is_valid_map(Map) -> orelse (maps:is_key(userinfo, Map) andalso not maps:is_key(host, Map) andalso - maps:is_key(port, Map)) + maps:is_key(port, Map))) orelse + not maps:is_key(path, Map) orelse + not is_host_and_path_valid(Map) of true -> false; @@ -1390,6 +1505,19 @@ is_valid_map(Map) -> end. +is_host_and_path_valid(Map) -> + Host = maps:get(host, Map, undefined), + Path = maps:get(path, Map, undefined), + not (Host =:= undefined andalso starts_with_two_slash(Path)). + + +starts_with_two_slash([$/,$/|_]) -> + true; +starts_with_two_slash(?STRING_REST("//", _)) -> + true; +starts_with_two_slash(_) -> false. + + update_scheme(#{scheme := Scheme}, _) -> add_colon_postfix(encode_scheme(Scheme)); update_scheme(#{}, _) -> diff --git a/lib/stdlib/test/property_test/uri_string_decode.erl b/lib/stdlib/test/property_test/uri_string_decode.erl deleted file mode 100644 index 137a649cf1..0000000000 --- a/lib/stdlib/test/property_test/uri_string_decode.erl +++ /dev/null @@ -1,55 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2017. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% --module(uri_string_decode). - --compile(export_all). - --proptest(eqc). --proptest([triq,proper]). - --ifndef(EQC). --ifndef(PROPER). --ifndef(TRIQ). --define(EQC,true). --endif. --endif. --endif. - --ifdef(EQC). --include_lib("eqc/include/eqc.hrl"). --define(MOD_eqc,eqc). - --else. --ifdef(PROPER). --include_lib("proper/include/proper.hrl"). --define(MOD_eqc,proper). - --else. --ifdef(TRIQ). --define(MOD_eqc,triq). --include_lib("triq/include/triq.hrl"). - --endif. --endif. --endif. - - -prop_uri_string_decode() -> - ok. diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl new file mode 100644 index 0000000000..dad67cd4c1 --- /dev/null +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -0,0 +1,360 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_recompose). + +-compile(export_all). + +-proptest(eqc). +-proptest([triq,proper]). + +-ifndef(EQC). +-ifndef(PROPER). +-ifndef(TRIQ). +-define(EQC,true). +-endif. +-endif. +-endif. + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +-define(MOD_eqc,eqc). + +-else. +-ifdef(PROPER). +-include_lib("proper/include/proper.hrl"). +-define(MOD_eqc,proper). + +-else. +-ifdef(TRIQ). +-define(MOD_eqc,triq). +-include_lib("triq/include/triq.hrl"). + +-endif. +-endif. +-endif. + + +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(SCHEME, {scheme, scheme()}). +-define(USER, {userinfo, unicode()}). +-define(HOST, {host, host_map()}). +-define(PORT, {port, port()}). +-define(PATH_ABE, {path, path_abempty_map()}). +-define(PATH_ABS, {path, path_absolute_map()}). +-define(PATH_NOS, {path, path_noscheme_map()}). +-define(PATH_ROO, {path, path_rootless_map()}). +-define(PATH_EMP, {path, path_empty_map()}). +-define(QUERY, {query, query_map()}). +-define(FRAGMENT, {fragment, fragment_map()}). + + +%%%======================================================================== +%%% Properties +%%%======================================================================== + +prop_recompose() -> + ?FORALL(Map, map(), + Map =:= uri_string:parse(uri_string:recompose(Map)) + ). + +%% Stats +prop_map_key_length_collect() -> + ?FORALL(List, map(), + collect(length(maps:keys(List)), true)). + +prop_map_collect() -> + ?FORALL(List, map(), + collect(lists:sort(maps:keys(List)), true)). + +prop_scheme_collect() -> + ?FORALL(List, scheme(), + collect(length(List), true)). + + +%%%======================================================================== +%%% Generators +%%%======================================================================== + +map() -> + ?LET(Gen, comp_proplist(), proplist_to_map(Gen)). + +comp_proplist() -> + frequency([ + {2, [?SCHEME,?PATH_ABS]}, + {2, [?SCHEME,?PATH_ROO]}, + {2, [?SCHEME,?PATH_EMP]}, + {2, [?SCHEME,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]}, + + {2, [?PATH_ABS]}, + {2, [?PATH_NOS]}, + {2, [?PATH_EMP]}, + {2, [?HOST,?PATH_ABE]}, + {2, [?USER,?HOST,?PATH_ABE]}, + {2, [?HOST,?PORT,?PATH_ABE]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY]}, + {2, [?SCHEME,?PATH_ROO,?QUERY]}, + {2, [?SCHEME,?PATH_EMP,?QUERY]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + {2, [?PATH_ABS,?QUERY]}, + {2, [?PATH_NOS,?QUERY]}, + {2, [?PATH_EMP,?QUERY]}, + {2, [?HOST,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + + {2, [?SCHEME,?PATH_ABS,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + {2, [?PATH_ABS,?FRAGMENT]}, + {2, [?PATH_NOS,?FRAGMENT]}, + {2, [?PATH_EMP,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + + {2, [?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?PATH_NOS,?QUERY,?FRAGMENT]}, + {2, [?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]} + ]). + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +path_abempty_map() -> + frequency([{90, path_abe_map()}, + {10, path_empty_map()}]). + +path_abe_map() -> + ?SIZED(Length, path_abe_map(Length, [])). +%% +path_abe_map(0, Segments) -> + ?LET(Gen, Segments, lists:append(Gen)); +path_abe_map(N, Segments) -> + path_abe_map(N-1, [slash(),segment()|Segments]). + + +path_absolute_map() -> + ?SIZED(Length, path_absolute_map(Length, [])). +%% +path_absolute_map(0, Segments) -> + ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen)); +path_absolute_map(N, Segments) -> + path_absolute_map(N-1, [slash(),segment()|Segments]). + + +path_noscheme_map() -> + ?SIZED(Length, path_noscheme_map(Length, [])). +%% +path_noscheme_map(0, Segments) -> + ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen)); +path_noscheme_map(N, Segments) -> + path_noscheme_map(N-1, [slash(),segment()|Segments]). + +path_rootless_map() -> + ?SIZED(Length, path_rootless_map(Length, [])). +%% +path_rootless_map(0, Segments) -> + ?LET(Gen, [segment_nz()|Segments], lists:append(Gen)); +path_rootless_map(N, Segments) -> + path_rootless_map(N-1, [slash(),segment()|Segments]). + + +segment_nz() -> + non_empty(segment()). + +segment_nz_nc() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$@])} + ]))). + + +segment() -> + list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$:, $@])} + ])). + +slash() -> + "/". + +path_empty_map() -> + "". + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +host_map() -> + frequency([{30, reg_name()}, + {30, ip_address()} + ]). + + +reg_name() -> + list(frequency([{30, alpha()}, + {10, sub_delims()}, + {10, unicode_char()} + ])). + +ip_address() -> + oneof(["127.0.0.1", "::127.0.0.1", + "2001:0db8:0000:0000:0000:0000:1428:07ab", + "2001:0db8:0000:0000:0000::1428:07ab", + "2001:0db8:0:0:0:0:1428:07ab", + "2001:0db8:0::0:1428:07ab"]). + +%% Generating only reg-names +host_uri() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, pct_encoded()} + ]))). + +%%------------------------------------------------------------------------- +%% Port, Query, Fragment +%%------------------------------------------------------------------------- +port() -> + range(1,65535). + + +query_map() -> + [$?| unicode()]. + + +query_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + +fragment_map() -> + unicode(). + +fragment_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + + +%%------------------------------------------------------------------------- +%% Scheme +%%------------------------------------------------------------------------- +scheme() -> + ?SIZED(Length, scheme_start(Length, [])). +%% +scheme_start(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme_start(N, L) -> + scheme(N-1,[alpha()|L]). + +scheme(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme(N, L) -> + scheme(N-1, [scheme_char()|L]). + + +%%------------------------------------------------------------------------- +%% Misc +%%------------------------------------------------------------------------- +unicode() -> + list(frequency([{20, alpha()}, % alpha + {10, digit()}, % digit + {10, unicode_char()} % unicode + ])). + +scheme_char() -> + frequency([{20, alpha()}, % alpha + {20, digit()}, % digit + {5, oneof([$+, $-, $.])} % punctuation + ]). + +sub_delims() -> + oneof([$!, $$, $&, $', $(, $), + $*, $+, $,,$;, $=]). + +pchar() -> + frequency([{20, unreserved()}, + {5, pct_encoded()}, + {5, sub_delims()}, + {1, oneof([$:, $@])} % punctuation + ]). + +unreserved() -> + frequency([{20, alpha()}, + {5, digit()}, + {1, oneof([$-, $., $_, $~])} % punctuation + ]). + +unicode_char() -> + range(913, 1023). + +alpha() -> + frequency([{20, range($a, $z)}, % letters + {20, range($A, $Z)}]). % letters + +digit() -> + range($0, $9). % numbers + +pct_encoded() -> + oneof(["%C3%A4", "%C3%A5", "%C3%B6"]). + + +%%%======================================================================== +%%% Helpers +%%%======================================================================== +proplist_to_map(L) -> + lists:foldl(fun({K,V},M) -> M#{K => V}; + (_,M) -> M + end, #{}, L). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 0eb5105c35..cd2e003d02 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -31,7 +31,8 @@ parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1, parse_pct_encoded_userinfo/1, parse_port/1, parse_query/1, parse_scheme/1, parse_userinfo/1, - parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, parse_special/1, + parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, + parse_special/1, parse_special2/1, recompose_fragment/1, recompose_parse_fragment/1, recompose_query/1, recompose_parse_query/1, recompose_path/1, recompose_parse_path/1, @@ -90,6 +91,7 @@ all() -> parse_mixed, parse_relative, parse_special, + parse_special2, recompose_fragment, recompose_parse_fragment, recompose_query, @@ -114,7 +116,7 @@ uri_combinations() -> Hst <- [fun update_host/1, fun update_host_binary/1, fun update_ipv6/1, fun update_ipv6_binary/1, none], Prt <- [fun update_port/1, none], - Pat <- [fun update_path/1, fun update_path_binary/1, none], + Pat <- [fun update_path/1, fun update_path_binary/1], Qry <- [fun update_query/1,fun update_query_binary/1, none], Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none], not (Usr =:= none andalso Hst =:= none andalso Prt =/= none), @@ -312,9 +314,7 @@ parse_binary_userinfo(_Config) -> #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"foo://user@localhost">>), #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = - uri_string:parse(<<"foo://user:password@localhost">>), - uri_parse_error =(catch uri_string:parse(<<"//user@">>)), - uri_parse_error = (catch uri_string:parse(<<"foo://user@">>)). + uri_string:parse(<<"foo://user:password@localhost">>). parse_binary_pct_encoded_userinfo(_Config) -> #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} = @@ -667,14 +667,24 @@ parse_special(_Config) -> #{host := "foo",path := "/"} = uri_string:parse("//foo/"), #{host := "foo",query := "?",scheme := "http"} = uri_string:parse("http://foo?"), #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"), - #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"). + #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"), + #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"), + #{host := "host",port := 80,query := "?",scheme := "http"} = uri_string:parse("http://host:80?"). + +parse_special2(_Config) -> + #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"), + #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"), + #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), + #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), + #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"). %%------------------------------------------------------------------------- %% Recompose tests %%------------------------------------------------------------------------- recompose_fragment(_Config) -> - <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>}), - ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT}). + <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}), + ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}). recompose_parse_fragment(_Config) -> <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)), @@ -682,15 +692,17 @@ recompose_parse_fragment(_Config) -> recompose_query(_Config) -> <<?QUERY_ENC>> = - uri_string:recompose(#{query => <<?QUERY/utf8>>}), + uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}), <<?QUERY_ENC?FRAGMENT_ENC>> = uri_string:recompose(#{query => <<?QUERY/utf8>>, - fragment => <<?FRAGMENT/utf8>>}), + fragment => <<?FRAGMENT/utf8>>, + path => <<>>}), "?name=%C3%B6rn" = - uri_string:recompose(#{query => "?name=örn"}), + uri_string:recompose(#{query => "?name=örn", path => ""}), "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(#{query => "?name=örn", - fragment => "näsa"}). + fragment => "näsa", + path => ""}). recompose_parse_query(_Config) -> <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)), diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl index de5edf54aa..ae2c61c7aa 100644 --- a/lib/stdlib/test/uri_string_property_test_SUITE.erl +++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl @@ -20,10 +20,9 @@ -module(uri_string_property_test_SUITE). -include_lib("common_test/include/ct.hrl"). - -compile(export_all). -all() -> [decode]. +all() -> [recompose]. init_per_suite(Config) -> ct_property_test:init_per_suite(Config). @@ -31,12 +30,10 @@ init_per_suite(Config) -> end_per_suite(Config) -> Config. -%%%================================================================ +%%%======================================================================== %%% Test suites -%%% - -decode(Config) -> +%%%======================================================================== +recompose(Config) -> ct_property_test:quickcheck( - uri_string_decode:prop_uri_string_decode(), - Config - ). + uri_string_recompose:prop_recompose(), + Config). |