aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/uri_string.erl
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-10-04 16:45:51 +0200
committerPéter Dimitrov <[email protected]>2017-10-23 15:53:28 +0200
commit1335e59a60d5e195baf519d2c52b0ca0aa96831f (patch)
tree736019933c52c183229dd3e4b7801ef3c08e26e4 /lib/stdlib/src/uri_string.erl
parent505579acda74b9281c965488f86cbd6c83254a57 (diff)
downloadotp-1335e59a60d5e195baf519d2c52b0ca0aa96831f.tar.gz
otp-1335e59a60d5e195baf519d2c52b0ca0aa96831f.tar.bz2
otp-1335e59a60d5e195baf519d2c52b0ca0aa96831f.zip
stdlib: Add property tests, bugfixes
- Add property tests using PropEr. - Add new testcases to uri_string_SUITE. - Improve calculation of parsed binary. - Verify if input to parse() is UTF8 encoded. - Update is_valid_map(): added check for path and host.
Diffstat (limited to 'lib/stdlib/src/uri_string.erl')
-rwxr-xr-xlib/stdlib/src/uri_string.erl224
1 files changed, 176 insertions, 48 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index bb7079c193..893ba4c6bf 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -304,8 +304,6 @@ parse(URIString) when is_list(URIString) ->
-spec recompose(URIMap) -> URIString when
URIMap :: uri_map(),
URIString :: uri_string().
-recompose(Map) when map_size(Map) =:= 0 ->
- "";
recompose(Map) ->
case is_valid_map(Map) of
false ->
@@ -405,7 +403,7 @@ convert_mapfields_to_list(Map) ->
%% URI-reference = URI / relative-ref
%%-------------------------------------------------------------------------
-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
-parse_uri_reference(<<>>, _) -> #{};
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
parse_uri_reference(URIString, URI) ->
try parse_scheme_start(URIString, URI) of
Res -> Res
@@ -434,13 +432,15 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
%% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- Userinfo = calculate_parsed_part(Rest, T),
- URI1#{userinfo => decode_userinfo(Userinfo)}
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
Host = calculate_parsed_part_sl(Rest, T),
- URI1#{host => decode_host(remove_brackets(Host))}
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
end;
parse_relative_part(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
@@ -449,11 +449,13 @@ parse_relative_part(?STRING_REST($/, Rest), URI) ->
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
Query = calculate_parsed_part(Rest, T),
- URI1#{query => decode_query(?STRING_REST($?, Query))};
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(?STRING_REST($?, Query))};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
- URI1#{fragment => decode_fragment(Fragment)};
+ Fragment = calculate_parsed_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
parse_relative_part(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true ->
@@ -505,7 +507,7 @@ parse_segment(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
@@ -528,7 +530,7 @@ parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
@@ -566,10 +568,32 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
case is_alpha(Char) of
true -> {T, URI1} = parse_scheme(Rest, URI),
Scheme = calculate_parsed_scheme(Rest, T),
- URI1#{scheme => ?STRING_REST(Char, Scheme)};
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
false -> throw(uri_parse_error)
end.
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+
+%% maybe_add_path(Map) ->
+%% case length(maps:keys(Map)) of
+%% 0 ->
+%% Map#{path => <<>>};
+%% _Else ->
+%% Map
+%% end.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
@@ -603,7 +627,7 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- Userinfo = calculate_parsed_part(Rest, T),
+ Userinfo = calculate_parsed_userinfo(Rest, T),
{Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
throw:uri_parse_error ->
@@ -621,7 +645,7 @@ parse_hier(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
case is_pchar(Char) of
@@ -660,12 +684,11 @@ parse_hier(?STRING_EMPTY, URI) ->
%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
%%-------------------------------------------------------------------------
-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
-parse_userinfo(?CHAR($@), _URI) ->
- %% URI cannot end in userinfo state
- throw(uri_parse_error);
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
parse_userinfo(?STRING_REST($@, Rest), URI) ->
{T, URI1} = parse_host(Rest, URI),
- Host = calculate_parsed_part(Rest, T),
+ Host = calculate_parsed_host(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}};
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
@@ -726,7 +749,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
parse_host(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_host(?STRING_REST($/, Rest), URI) ->
@@ -741,7 +764,7 @@ parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_host(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -755,7 +778,7 @@ parse_host(?STRING_EMPTY, URI) ->
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
parse_reg_name(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_reg_name(?STRING_REST($/, Rest), URI) ->
@@ -768,7 +791,7 @@ parse_reg_name(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
@@ -788,7 +811,7 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
@@ -804,7 +827,7 @@ parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
@@ -851,7 +874,7 @@ is_ipv6(Char) -> is_hex_digit(Char).
-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
@@ -864,7 +887,7 @@ parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
@@ -902,7 +925,7 @@ parse_port(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -928,7 +951,7 @@ parse_port(?STRING_EMPTY, URI) ->
-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
parse_query(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
@@ -1055,11 +1078,88 @@ remove_brackets(Addr) -> Addr.
-spec calculate_parsed_part(binary(), binary()) -> binary().
calculate_parsed_part(<<$?>>, _) -> <<>>;
calculate_parsed_part(<<$#>>, _) -> <<>>;
+calculate_parsed_part(<<>>, _) -> <<>>;
+calculate_parsed_part(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
calculate_parsed_part(Input, Unparsed) ->
{First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
First.
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(<<$?>>, _) -> <<>>;
+calculate_parsed_userinfo(<<$#>>, _) -> <<>>;
+calculate_parsed_userinfo(<<>>, _) -> <<>>;
+calculate_parsed_userinfo(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $@ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_userinfo(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_host(binary(), binary()) -> binary().
+calculate_parsed_host(<<$?>>, _) -> <<>>;
+calculate_parsed_host(<<$#>>, _) -> <<>>;
+calculate_parsed_host(<<>>, _) -> <<>>;
+calculate_parsed_host(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $/ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_host(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_port(binary(), binary()) -> binary().
+calculate_parsed_port(<<$?>>, _) -> <<>>;
+calculate_parsed_port(<<$#>>, _) -> <<>>;
+calculate_parsed_port(<<>>, _) -> <<>>;
+calculate_parsed_port(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $/ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_port(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_fragment(binary(), binary()) -> binary().
+calculate_parsed_fragment(<<$#>>, _) -> <<>>;
+calculate_parsed_fragment(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
%% Returns the parsed binary based on Input and the Unparsed part.
%% Used when parsing authority.
%%
@@ -1079,28 +1179,25 @@ calculate_parsed_part_sl(<<>>, _) -> <<>>;
calculate_parsed_part_sl(Input, <<>>) ->
case binary:last(Input) of
$? ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
-
+ init_binary(Input);
$# ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
+ init_binary(Input);
$/ ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
+ init_binary(Input);
_Else ->
- {First, _} =
- split_binary(Input, byte_size_exl_single_slash(Input)),
- First
+ Input
end;
calculate_parsed_part_sl(Input, Unparsed) ->
{First, _} =
split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)),
First.
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
%% Returns the parsed binary based on Input and the Unparsed part.
%% Used when parsing scheme.
@@ -1109,6 +1206,7 @@ calculate_parsed_scheme(Input, Unparsed) ->
{First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1),
First.
+
%% Returns the size of a binary exluding the first element.
%% Used in calls to split_binary().
-spec byte_size_exl_head(binary()) -> number().
@@ -1149,25 +1247,35 @@ byte_size_exl_single_slash(Rest) -> byte_size(Rest).
%%-------------------------------------------------------------------------
-spec decode_userinfo(binary()) -> binary().
decode_userinfo(Cs) ->
- decode(Cs, fun is_userinfo/1, <<>>).
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
-spec decode_host(binary()) -> binary().
decode_host(Cs) ->
- decode(Cs, fun is_host/1, <<>>).
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
-spec decode_path(binary()) -> binary().
decode_path(Cs) ->
- decode(Cs, fun is_path/1, <<>>).
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
-spec decode_query(binary()) -> binary().
decode_query(Cs) ->
- decode(Cs, fun is_query/1, <<>>).
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
-spec decode_fragment(binary()) -> binary().
decode_fragment(Cs) ->
- decode(Cs, fun is_fragment/1, <<>>).
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw(uri_parse_error);
+ {error,_,_} ->
+ throw(uri_parse_error);
+ _ -> Cs
+ end.
+
%%-------------------------------------------------------------------------
%% Percent-encode
%%-------------------------------------------------------------------------
@@ -1368,10 +1476,15 @@ bracket_ipv6(Addr) when is_list(Addr) ->
%% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"}
%% There is always at least an empty host when both userinfo and port
%% are present.
+%% - #{path => "///"} otherwise the following would be true:
+%% "/////" = uri_string:recompose(#{host => "", path => "///"})
+%% "/////" = uri_string:recompose(#{path => "/////"})
+%% AND
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
%%-------------------------------------------------------------------------
is_valid_map(Map) ->
case
- (not maps:is_key(userinfo, Map) andalso
+ ((not maps:is_key(userinfo, Map) andalso
not maps:is_key(host, Map) andalso
maps:is_key(port, Map))
orelse
@@ -1381,7 +1494,9 @@ is_valid_map(Map) ->
orelse
(maps:is_key(userinfo, Map) andalso
not maps:is_key(host, Map) andalso
- maps:is_key(port, Map))
+ maps:is_key(port, Map))) orelse
+ not maps:is_key(path, Map) orelse
+ not is_host_and_path_valid(Map)
of
true ->
false;
@@ -1390,6 +1505,19 @@ is_valid_map(Map) ->
end.
+is_host_and_path_valid(Map) ->
+ Host = maps:get(host, Map, undefined),
+ Path = maps:get(path, Map, undefined),
+ not (Host =:= undefined andalso starts_with_two_slash(Path)).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
update_scheme(#{scheme := Scheme}, _) ->
add_colon_postfix(encode_scheme(Scheme));
update_scheme(#{}, _) ->