aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-09-29 16:54:50 +0200
committerPéter Dimitrov <[email protected]>2017-10-23 15:53:28 +0200
commit505579acda74b9281c965488f86cbd6c83254a57 (patch)
tree281f4adfc53cc0d99e590cd5a0e4186f2e286846 /lib/stdlib
parent892bf58ee115a7e56ff38083afd85702bb8e14d3 (diff)
downloadotp-505579acda74b9281c965488f86cbd6c83254a57.tar.gz
otp-505579acda74b9281c965488f86cbd6c83254a57.tar.bz2
otp-505579acda74b9281c965488f86cbd6c83254a57.zip
stdlib: Improve calculation of parsed binary
- Improved calculation of parsed binary. - Added tests for special corner cases. - Fixed dialyzer warnings.
Diffstat (limited to 'lib/stdlib')
-rwxr-xr-xlib/stdlib/src/uri_string.erl246
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl19
2 files changed, 164 insertions, 101 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 89a2c21518..bb7079c193 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -434,51 +434,36 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
%% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Userinfo = calculate_parsed_part(Rest, T),
URI1#{userinfo => decode_userinfo(Userinfo)}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part_sl(Rest, T),
URI1#{host => decode_host(remove_brackets(Host))}
end;
parse_relative_part(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
URI1#{path => decode_path(?STRING_REST($/, Path))};
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
URI1#{query => decode_query(?STRING_REST($?, Query))};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
URI1#{fragment => decode_fragment(Fragment)};
parse_relative_part(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true ->
{T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
URI1#{path => decode_path(?STRING_REST(Char, Path))};
false -> throw(uri_parse_error)
end.
-%% Returns size of 'Rest' for proper calculation of splitting position.
-%% Solves the following special case:
-%%
-%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
-%%
-%% While keeping the following true:
-%%
-%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>).
-%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>).
-%%
--spec byte_size_exl_single_slash(uri_string()) -> number().
-byte_size_exl_single_slash(<<$/>>) -> 0;
-byte_size_exl_single_slash(Rest) -> byte_size(Rest).
-
-
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 3.3. Path]
%%
@@ -516,11 +501,11 @@ parse_segment(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
@@ -539,11 +524,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
@@ -580,7 +565,7 @@ is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
case is_alpha(Char) of
true -> {T, URI1} = parse_scheme(Rest, URI),
- {Scheme, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Scheme = calculate_parsed_scheme(Rest, T),
URI1#{scheme => ?STRING_REST(Char, Scheme)};
false -> throw(uri_parse_error)
end.
@@ -618,31 +603,31 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Userinfo = calculate_parsed_part(Rest, T),
{Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part_sl(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}}
end;
parse_hier(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_hier(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
case is_pchar(Char) of
true -> % segment_nz
{T, URI1} = parse_segment(Rest, URI),
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
false -> throw(uri_parse_error)
end;
@@ -680,7 +665,7 @@ parse_userinfo(?CHAR($@), _URI) ->
throw(uri_parse_error);
parse_userinfo(?STRING_REST($@, Rest), URI) ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}};
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
@@ -741,22 +726,22 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
parse_host(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_host(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_host(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_host(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -770,20 +755,20 @@ parse_host(?STRING_EMPTY, URI) ->
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
parse_reg_name(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_reg_name(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_reg_name(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
@@ -803,23 +788,23 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
@@ -866,20 +851,20 @@ is_ipv6(Char) -> is_hex_digit(Char).
-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
@@ -909,15 +894,15 @@ validate_ipv6_address(Addr) ->
-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
parse_port(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_port(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -943,7 +928,7 @@ parse_port(?STRING_EMPTY, URI) ->
-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
parse_query(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
@@ -1046,13 +1031,6 @@ is_hex_digit(C)
is_hex_digit(_) -> false.
-%% Returns the size of a binary exluding the first element.
-%% Used in calls to split_binary().
--spec byte_size_exl_head(binary()) -> number().
-byte_size_exl_head(<<>>) -> 0;
-byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
-
-
%% Remove enclosing brackets from binary
-spec remove_brackets(binary()) -> binary().
remove_brackets(<<$[/utf8, Rest/binary>>) ->
@@ -1065,6 +1043,95 @@ remove_brackets(Addr) -> Addr.
%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Handles the following special cases:
+%%
+%% #{host => [],path => "/",query => "?"} = uri_string:parse("///?")
+%% #{fragment => [],host => [],path => "/"} = uri_string:parse("///#")
+%%
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(<<$?>>, _) -> <<>>;
+calculate_parsed_part(<<$#>>, _) -> <<>>;
+calculate_parsed_part(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Used when parsing authority.
+%%
+%% Handles the following special cases:
+%%
+%% #{host => "foo",query => "?"} = uri_string:parse("//foo?")
+%% #{fragment => [],host => "foo"} = uri_string:parse("//foo#")
+%% #{host => "foo",path => "/"} = uri_string:parse("//foo/")
+%% #{host => "foo",query => "?",scheme => "http"} = uri_string:parse("http://foo?")
+%% #{fragment => [],host => "foo",scheme => "http"} = uri_string:parse("http://foo#")
+%% #{host => "foo",path => "/",scheme => "http"} = uri_string:parse("http://foo/")
+%%
+-spec calculate_parsed_part_sl(binary(), binary()) -> binary().
+calculate_parsed_part_sl(<<$?>>, _) -> <<>>;
+calculate_parsed_part_sl(<<$#>>, _) -> <<>>;
+calculate_parsed_part_sl(<<>>, _) -> <<>>;
+calculate_parsed_part_sl(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+
+ $# ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+ $/ ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+ _Else ->
+ {First, _} =
+ split_binary(Input, byte_size_exl_single_slash(Input)),
+ First
+ end;
+calculate_parsed_part_sl(Input, Unparsed) ->
+ {First, _} =
+ split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Used when parsing scheme.
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1),
+ First.
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%% Returns size of 'Rest' for proper calculation of splitting position.
+%% Solves the following special case:
+%%
+%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
+%%
+%% While keeping the following true:
+%%
+%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>).
+%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>).
+%%
+-spec byte_size_exl_single_slash(uri_string()) -> number().
+byte_size_exl_single_slash(<<$/>>) -> 0;
+byte_size_exl_single_slash(Rest) -> byte_size(Rest).
+
+
+%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 2.1. Percent-Encoding]
%%
%% A percent-encoding mechanism is used to represent a data octet in a
@@ -1080,23 +1147,23 @@ remove_brackets(Addr) -> Addr.
%%
%% pct-encoded = "%" HEXDIG HEXDIG
%%-------------------------------------------------------------------------
--spec decode_userinfo(list()|binary()) -> list() | binary().
+-spec decode_userinfo(binary()) -> binary().
decode_userinfo(Cs) ->
decode(Cs, fun is_userinfo/1, <<>>).
--spec decode_host(list()|binary()) -> list() | binary().
+-spec decode_host(binary()) -> binary().
decode_host(Cs) ->
decode(Cs, fun is_host/1, <<>>).
--spec decode_path(list()|binary()) -> list() | binary().
+-spec decode_path(binary()) -> binary().
decode_path(Cs) ->
decode(Cs, fun is_path/1, <<>>).
--spec decode_query(list()|binary()) -> list() | binary().
+-spec decode_query(binary()) -> binary().
decode_query(Cs) ->
decode(Cs, fun is_query/1, <<>>).
--spec decode_fragment(list()|binary()) -> list() | binary().
+-spec decode_fragment(binary()) -> binary().
decode_fragment(Cs) ->
decode(Cs, fun is_fragment/1, <<>>).
@@ -1136,7 +1203,10 @@ encode_path(Cs) ->
-spec encode_query(list()|binary()) -> list() | binary().
encode_query(Cs) ->
- encode(Cs, fun is_query/1).
+ case validate_query(Cs) of
+ true -> encode(Cs, fun is_query/1);
+ false -> throw(uri_parse_error)
+ end.
-spec encode_fragment(list()|binary()) -> list() | binary().
encode_fragment(Cs) ->
@@ -1145,7 +1215,6 @@ encode_fragment(Cs) ->
%%-------------------------------------------------------------------------
%% Helper funtions for percent-decode
%%-------------------------------------------------------------------------
--spec decode(list()|binary(), fun(), binary()) -> list() | binary().
decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
@@ -1159,21 +1228,7 @@ decode(<<C,Cs/binary>>, Fun, Acc) ->
false -> throw(uri_parse_error)
end;
decode(<<>>, _Fun, Acc) ->
- Acc;
-decode([$%,C0,C1|Cs], Fun, Acc) ->
- case is_hex_digit(C0) andalso is_hex_digit(C1) of
- true ->
- B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
- decode(Cs, Fun, <<Acc/binary, B>>);
- false -> throw(uri_parse_error)
- end;
-decode([C|Cs], Fun, Acc) ->
- case Fun(C) of
- true -> decode(Cs, Fun, <<Acc/binary, C>>);
- false -> throw(uri_parse_error)
- end;
-decode([], _Fun, Acc) ->
- unicode:characters_to_list(Acc).
+ Acc.
%% Check if char is allowed in host
-spec is_host(char()) -> boolean().
@@ -1186,7 +1241,6 @@ is_path($/) -> true;
is_path(Char) -> is_pchar(Char).
-
%%-------------------------------------------------------------------------
%% Helper functions for percent-encode
%%-------------------------------------------------------------------------
@@ -1206,7 +1260,7 @@ encode(<<>>, _Fun, Acc) ->
Acc.
--spec encode_codepoint_binary(integer(), fun()) -> list().
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
encode_codepoint_binary(C, Fun) ->
case Fun(C) of
false -> percent_encode_binary(C);
@@ -1240,6 +1294,11 @@ validate_scheme(<<H, Rest/binary>>) ->
false -> false
end.
+validate_query([$?|_]) -> true;
+validate_query(<<$?/utf8, _/binary>>) -> true;
+validate_query(_) -> false.
+
+
%%-------------------------------------------------------------------------
%% Classifies hostname into the following categories:
%% regname, ipv4 - address does not contain reserved characters to be
@@ -1248,7 +1307,7 @@ validate_scheme(<<H, Rest/binary>>) ->
%% encolsed in brackets
%% other - address shall be percent-encoded
%%-------------------------------------------------------------------------
-classify_host([]) -> false;
+classify_host([]) -> other;
classify_host(Addr) when is_binary(Addr) ->
A = unicode:characters_to_list(Addr),
classify_host_ipv6(A);
@@ -1272,12 +1331,6 @@ classify_host_regname([H|T]) ->
case is_reg_name(H) of
true -> classify_host_regname(T);
false -> other
- end;
-classify_host_regname(<<>>) -> regname;
-classify_host_regname(<<H, Rest/binary>>) ->
- case is_reg_name(H) of
- true -> classify_host_regname(Rest);
- false -> other
end.
is_ipv4_address(Addr) ->
@@ -1391,7 +1444,7 @@ update_query(#{}, URI) ->
update_fragment(#{fragment := Fragment}, empty) ->
- add_hashmark(encode_query(Fragment));
+ add_hashmark(encode_fragment(Fragment));
update_fragment(#{fragment := Fragment}, URI) ->
concat(URI,add_hashmark(encode_fragment(Fragment)));
update_fragment(#{}, empty) ->
@@ -1411,31 +1464,24 @@ concat(A, B) when is_binary(A), is_list(B) ->
concat(A, B) when is_list(A) ->
A ++ maybe_to_list(B).
-add_hashmark(empty) -> empty;
add_hashmark(Comp) when is_binary(Comp) ->
<<$#, Comp/binary>>;
add_hashmark(Comp) when is_list(Comp) ->
[$#|Comp].
-add_colon(empty) -> empty;
add_colon(Comp) when is_binary(Comp) ->
- <<$:, Comp/binary>>;
-add_colon(Comp) when is_list(Comp) ->
- [$:|Comp].
+ <<$:, Comp/binary>>.
-add_colon_postfix(empty) -> empty;
add_colon_postfix(Comp) when is_binary(Comp) ->
<<Comp/binary,$:>>;
add_colon_postfix(Comp) when is_list(Comp) ->
Comp ++ ":".
-add_auth_prefix(empty) -> empty;
add_auth_prefix(Comp) when is_binary(Comp) ->
<<"//", Comp/binary>>;
add_auth_prefix(Comp) when is_list(Comp) ->
[$/,$/|Comp].
-add_host_prefix(_, empty) -> empty;
add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
<<$@,Host/binary>>;
add_host_prefix(#{}, Host) when is_binary(Host) ->
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 1859a25a18..0eb5105c35 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -31,7 +31,7 @@
parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
- parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, parse_special/1,
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
@@ -89,6 +89,7 @@ all() ->
parse_binary,
parse_mixed,
parse_relative,
+ parse_special,
recompose_fragment,
recompose_parse_fragment,
recompose_query,
@@ -651,6 +652,22 @@ parse_relative(_Config) ->
#{path := "foo"} =
uri_string:parse(lists:append("fo",<<"o">>)).
+parse_special(_Config) ->
+ #{host := [],query := "?"} = uri_string:parse("//?"),
+ #{fragment := [],host := []} = uri_string:parse("//#"),
+ #{host := [],query := "?",scheme := "foo"} = uri_string:parse("foo://?"),
+ #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>),
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>),
+ #{host := [],path := "/",query := "?"} = uri_string:parse("///?"),
+ #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"),
+ #{host := "foo",query := "?"} = uri_string:parse("//foo?"),
+ #{fragment := [],host := "foo"} = uri_string:parse("//foo#"),
+ #{host := "foo",path := "/"} = uri_string:parse("//foo/"),
+ #{host := "foo",query := "?",scheme := "http"} = uri_string:parse("http://foo?"),
+ #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
+ #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/").
%%-------------------------------------------------------------------------
%% Recompose tests