diff options
author | Péter Dimitrov <[email protected]> | 2017-09-19 16:07:49 +0200 |
---|---|---|
committer | Péter Dimitrov <[email protected]> | 2017-10-23 15:53:28 +0200 |
commit | 6c0c11eeaf0649cfbca5e426263c7dc43b49feff (patch) | |
tree | 2470505a10e93628bbf03fe59e20a63a944b84e6 /lib | |
parent | ec3f0c7f96531b714082f5af694a7ed6a02769ce (diff) | |
download | otp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.tar.gz otp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.tar.bz2 otp-6c0c11eeaf0649cfbca5e426263c7dc43b49feff.zip |
stdlib: Add support to parse percent-encoded URIs
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/stdlib/src/uri_string.erl | 198 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 78 |
2 files changed, 217 insertions, 59 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 3656d561be..50e8a0bf5a 100755 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -223,9 +223,9 @@ %% -module(uri_string). - -export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1, parse/1, recompose/1, resolve_uri_reference/2, transcode/2]). +-export([is_host/1, is_path/1]). % suppress warnings -export_type([uri_map/0, uri_string/0]). -define(CHAR(Char), <<Char/utf8>>). @@ -383,31 +383,31 @@ parse_relative_part(?STRING_REST("//", Rest), URI) -> try parse_userinfo(Rest, URI) of {T, URI1} -> {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1), - URI1#{userinfo => Userinfo} + URI1#{userinfo => decode_userinfo(Userinfo)} catch throw:uri_parse_error -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), - URI1#{host => remove_brackets(Host)} + URI1#{host => decode_host(remove_brackets(Host))} end; parse_relative_part(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - URI1#{path => ?STRING_REST($/, Path)}; + URI1#{path => decode_path(?STRING_REST($/, Path))}; parse_relative_part(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - URI1#{query => ?STRING_REST($?, Query)}; + URI1#{query => decode_query(?STRING_REST($?, Query))}; parse_relative_part(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - URI1#{fragment => Fragment}; + URI1#{fragment => decode_fragment(Fragment)}; parse_relative_part(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of true -> {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - URI1#{path => ?STRING_REST(Char, Path)}; + URI1#{path => decode_path(?STRING_REST(Char, Path))}; false -> throw(uri_parse_error) end. @@ -491,11 +491,11 @@ parse_segment(?STRING_REST($/, Rest), URI) -> parse_segment(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_segment(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment(?STRING_REST(Char, Rest), URI) -> case is_pchar(Char) of true -> parse_segment(Rest, URI); @@ -515,16 +515,16 @@ parse_segment([H|Rest], Acc, URI) when is_list(H) -> parse_segment([$/|Rest], Acc, URI) -> parse_segment(Rest, [$/|Acc], URI); % segment parse_segment([$?|Rest], Acc, URI) -> - parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query + parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query parse_segment([$#|Rest], Acc, URI) -> - parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)}); + parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))}); parse_segment([Char|Rest], Acc, URI) -> case is_pchar(Char) of true -> parse_segment(Rest, [Char|Acc], URI); false -> throw(uri_parse_error) end; parse_segment([], Acc, URI) -> - URI#{path => lists:reverse(Acc)}. + URI#{path => decode_path(lists:reverse(Acc))}. %%------------------------------------------------------------------------- %% path-noscheme @@ -535,11 +535,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of true -> parse_segment_nz_nc(Rest, URI); @@ -559,16 +559,16 @@ parse_segment_nz_nc([H|Rest], Acc, URI) when is_list(H) -> parse_segment_nz_nc([$/|Rest], Acc, URI) -> parse_segment(Rest, [$/|Acc], URI); % segment parse_segment_nz_nc([$?|Rest], Acc, URI) -> - parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query + parse_query(Rest, [$?], URI#{path => decode_path(lists:reverse(Acc))}); % ?query parse_segment_nz_nc([$#|Rest], Acc, URI) -> - parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)}); + parse_fragment(Rest, [], URI#{path => decode_path(lists:reverse(Acc))}); parse_segment_nz_nc([Char|Rest], Acc, URI) -> case is_segment_nz_nc(Char) of true -> parse_segment_nz_nc(Rest, [Char|Acc], URI); false -> throw(uri_parse_error) end; parse_segment_nz_nc([], Acc, URI) -> - URI#{path => lists:reverse(Acc)}. + URI#{path => decode_path(lists:reverse(Acc))}. %% Check if char is pchar. -spec is_pchar(char()) -> boolean(). @@ -664,31 +664,31 @@ parse_hier(?STRING_REST("//", Rest), URI) -> try parse_userinfo(Rest, URI) of {T, URI1} -> {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1), - {Rest, URI1#{userinfo => Userinfo}} + {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} catch throw:uri_parse_error -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{host => remove_brackets(Host)}} + {Rest, URI1#{host => decode_host(remove_brackets(Host))}} end; parse_hier(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_hier(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_hier(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless case is_pchar(Char) of true -> % segment_nz {T, URI1} = parse_segment(Rest, URI), {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST(Char, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}}; false -> throw(uri_parse_error) end; parse_hier(?STRING_EMPTY, URI) -> @@ -756,7 +756,7 @@ parse_userinfo(?CHAR($@), _URI) -> parse_userinfo(?STRING_REST($@, Rest), URI) -> {T, URI1} = parse_host(Rest, URI), {Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{host => remove_brackets(Host)}}; + {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; parse_userinfo(?STRING_REST(Char, Rest), URI) -> case is_userinfo(Char) of true -> parse_userinfo(Rest, URI); @@ -778,11 +778,11 @@ parse_userinfo([$@], _Acc, _URI) -> %% URI cannot end in userinfo state throw(uri_parse_error); parse_userinfo([$@|Rest], Acc, URI) -> - parse_host(Rest, [], URI#{userinfo => lists:reverse(Acc)}); + parse_host(Rest, [], URI#{userinfo => decode_userinfo(lists:reverse(Acc))}); parse_userinfo([Char|Rest], Acc, URI) -> case is_userinfo(Char) of true -> parse_userinfo(Rest, [Char|Acc], URI); - false -> throw(uri_parse_error) % URI#{userinfo => lists:reverse(Acc)} + false -> throw(uri_parse_error) end; parse_userinfo([], _Acc, _URI) -> %% URI cannot end in userinfo state @@ -843,17 +843,17 @@ parse_host(?STRING_REST($:, Rest), URI) -> parse_host(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_host(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_host(?STRING_REST($[, Rest), URI) -> parse_ipv6_bin(Rest, [], URI); parse_host(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_host(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of true -> parse_ipv4_bin(Rest, [Char], URI); @@ -871,13 +871,13 @@ parse_host([H|Rest], Acc, URI) when is_binary(H) -> parse_host([H|Rest], Acc, URI) when is_list(H) -> parse_host(H ++ Rest, Acc, URI); parse_host([$:|Rest], Acc, URI) -> - parse_port(Rest, [], URI#{host => lists:reverse(Acc)}); + parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); parse_host([$/|Rest], Acc, URI) -> - parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty + parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty parse_host([$?|Rest], Acc, URI) -> - parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query + parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query parse_host([$#|Rest], Acc, URI) -> - parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty + parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty parse_host([$[|Rest], _Acc, URI) -> parse_ipv6(Rest, [], URI); parse_host([Char|Rest], Acc, URI) -> @@ -886,7 +886,7 @@ parse_host([Char|Rest], Acc, URI) -> false -> parse_reg_name([Char|Rest], Acc, URI) end; parse_host([], Acc, URI) -> - URI#{host => lists:reverse(Acc)}. + URI#{host => decode_host(lists:reverse(Acc))}. -spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. @@ -898,15 +898,15 @@ parse_reg_name(?STRING_REST($:, Rest), URI) -> parse_reg_name(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_reg_name(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_reg_name(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_reg_name(?STRING_REST(Char, Rest), URI) -> case is_reg_name(Char) of true -> parse_reg_name(Rest, URI); @@ -924,20 +924,20 @@ parse_reg_name([H|Rest], Acc, URI) when is_binary(H) -> parse_reg_name([H|Rest], Acc, URI) when is_list(H) -> parse_reg_name(H ++ Rest, Acc, URI); parse_reg_name([$:|Rest], Acc, URI) -> - parse_port(Rest, [], URI#{host => lists:reverse(Acc)}); + parse_port(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); parse_reg_name([$/|Rest], Acc, URI) -> - parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty + parse_segment(Rest, [$/], URI#{host => decode_host(lists:reverse(Acc))}); % path-abempty parse_reg_name([$?|Rest], Acc, URI) -> - parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query + parse_query(Rest, [$?], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty ?query parse_reg_name([$#|Rest], Acc, URI) -> - parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty + parse_fragment(Rest, [], URI#{host => decode_host(lists:reverse(Acc))}); % path-empty parse_reg_name([Char|Rest], Acc, URI) -> case is_reg_name(Char) of true -> parse_reg_name(Rest, [Char|Acc], URI); false -> throw(uri_parse_error) end; parse_reg_name([], Acc, URI) -> - URI#{host => lists:reverse(Acc)}. + URI#{host => decode_host(lists:reverse(Acc))}. %% Check if char is allowed in reg-name -spec is_reg_name(char()) -> boolean(). @@ -956,17 +956,17 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_segment(Rest, URI), % path-abempty {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> case is_ipv4(Char) of true -> parse_ipv4_bin(Rest, [Char|Acc], URI); @@ -1062,15 +1062,15 @@ parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> case is_ipv6(Char) of true -> parse_ipv6_bin_end(Rest, URI); @@ -1120,15 +1120,15 @@ validate_ipv6_address(Addr) -> parse_port(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_port(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), - {Rest, URI1#{query => ?STRING_REST($?, Query)}}; + {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}}; parse_port(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_port(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of true -> parse_port(Rest, URI); @@ -1180,7 +1180,7 @@ parse_port([], Acc, URI) -> parse_query(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), - {Rest, URI1#{fragment => Fragment}}; + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_query(?STRING_REST(Char, Rest), URI) -> case is_query(Char) of true -> parse_query(Rest, URI); @@ -1198,18 +1198,19 @@ parse_query([H|Rest], Acc, URI) when is_binary(H) -> parse_query([H|Rest], Acc, URI) when is_list(H) -> parse_query(H ++ Rest, Acc, URI); parse_query([$#|Rest], Acc, URI) -> - parse_fragment(Rest, [], URI#{query => lists:reverse(Acc)}); + parse_fragment(Rest, [], URI#{query => decode_query(lists:reverse(Acc))}); parse_query([Char|Rest], Acc, URI) -> case is_query(Char) of true -> parse_query(Rest, [Char|Acc], URI); false -> throw(uri_parse_error) end; parse_query([], Acc, URI) -> - URI#{query => lists:reverse(Acc)}. + URI#{query => decode_query(lists:reverse(Acc))}. %% Check if char is allowed in query -spec is_query(char()) -> boolean(). is_query($/) -> true; +is_query($?) -> true; is_query(Char) -> is_pchar(Char). @@ -1245,7 +1246,7 @@ parse_fragment([Char|Rest], Acc, URI) -> false -> throw(uri_parse_error) end; parse_fragment([], Acc, URI) -> - URI#{fragment => lists:reverse(Acc)}. + URI#{fragment => decode_fragment(lists:reverse(Acc))}. %% Check if char is allowed in fragment -spec is_fragment(char()) -> boolean(). @@ -1339,3 +1340,90 @@ remove_brackets(?STRING_REST($[,Addr)) -> A1 = binary:replace(Addr, <<$[>>, <<>>), binary:replace(A1, <<$]>>, <<>>); remove_brackets(Addr) -> Addr. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.1. Percent-Encoding] +%% +%% A percent-encoding mechanism is used to represent a data octet in a +%% component when that octet's corresponding character is outside the +%% allowed set or is being used as a delimiter of, or within, the +%% component. A percent-encoded octet is encoded as a character +%% triplet, consisting of the percent character "%" followed by the two +%% hexadecimal digits representing that octet's numeric value. For +%% example, "%20" is the percent-encoding for the binary octet +%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space +%% character (SP). Section 2.4 describes when percent-encoding and +%% decoding is applied. +%% +%% pct-encoded = "%" HEXDIG HEXDIG +%%------------------------------------------------------------------------- +-spec decode_userinfo(list()|binary()) -> list() | binary(). +decode_userinfo(Cs) -> + decode(Cs, fun is_userinfo/1, <<>>). + + +-spec decode_host(list()|binary()) -> list() | binary(). +decode_host(Cs) -> + decode(Cs, fun is_host/1, <<>>). + +%% Check if char is allowed in host +-spec is_host(char()) -> boolean(). +is_host($:) -> true; +is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +-spec decode_path(list()|binary()) -> list() | binary(). +decode_path(Cs) -> + decode(Cs, fun is_path/1, <<>>). + +%% Check if char is allowed in path +-spec is_path(char()) -> boolean(). +is_path($/) -> true; + +is_path(Char) -> is_pchar(Char). + + +-spec decode_query(list()|binary()) -> list() | binary(). +decode_query(Cs) -> + decode(Cs, fun is_query/1, <<>>). + +-spec decode_fragment(list()|binary()) -> list() | binary(). +decode_fragment(Cs) -> + decode(Cs, fun is_host/1, <<>>). + + +-spec decode(list()|binary(), fun(), binary()) -> list() | binary(). +decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = hex2dec(C0)*16+hex2dec(C1), + decode(Cs, Fun, <<Acc/binary, B>>); + false -> throw(uri_parse_error) + end; +decode(<<C,Cs/binary>>, Fun, Acc) -> + case Fun(C) of + true -> decode(Cs, Fun, <<Acc/binary, C>>); + false -> throw(uri_parse_error) + end; +decode(<<>>, _Fun, Acc) -> + Acc; +decode([$%,C0,C1|Cs], Fun, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = hex2dec(C0)*16+hex2dec(C1), + decode(Cs, Fun, <<Acc/binary, B>>); + false -> throw(uri_parse_error) + end; +decode([C|Cs], Fun, Acc) -> + case Fun(C) of + true -> decode(Cs, Fun, <<Acc/binary, C>>); + false -> throw(uri_parse_error) + end; +decode([], _Fun, Acc) -> + unicode:characters_to_list(Acc). + + +hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0; +hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10; +hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10. diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 9b8e52f0b2..c379eeb15b 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -24,10 +24,12 @@ -export([all/0, suite/0,groups/0, parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1, parse_binary_host_ipv6/1, - parse_binary_path/1, parse_binary_port/1, + parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1, + parse_binary_pct_encoded_userinfo/1, parse_binary_port/1, parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1, parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1, - parse_path/1, parse_port/1, + parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1, + parse_pct_encoded_userinfo/1, parse_port/1, parse_query/1, parse_scheme/1, parse_userinfo/1, parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1 ]). @@ -39,22 +41,28 @@ all() -> [ parse_binary_scheme, parse_binary_userinfo, + parse_binary_pct_encoded_userinfo, parse_binary_host, parse_binary_host_ipv4, parse_binary_host_ipv6, parse_binary_port, parse_binary_path, parse_binary_query, + parse_binary_pct_encoded_query, parse_binary_fragment, + parse_binary_pct_encoded_fragment, parse_scheme, parse_userinfo, + parse_pct_encoded_userinfo, parse_host, parse_host_ipv4, parse_host_ipv6, parse_port, parse_path, parse_query, + parse_pct_encoded_query, parse_fragment, + parse_pct_encoded_fragment, parse_list, parse_binary, parse_mixed, @@ -92,8 +100,27 @@ parse_binary_userinfo(_Config) -> uri_string:parse(<<"foo://user@localhost">>), #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = uri_string:parse(<<"foo://user:password@localhost">>), - uri_parse_error =(catch uri_string:parse("//user@")), - uri_parse_error = (catch uri_string:parse("foo://user@")). + uri_parse_error =(catch uri_string:parse(<<"//user@">>)), + uri_parse_error = (catch uri_string:parse(<<"foo://user@">>)). + +parse_binary_pct_encoded_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} = + uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>), + #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>), + #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} = + uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>), + #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>), + uri_parse_error =(catch uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>)), + uri_parse_error = (catch uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>)). parse_binary_host(_Config) -> #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), @@ -176,6 +203,12 @@ parse_binary_query(_Config) -> #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} = uri_string:parse(<<"//example.com/?name=ferret">>). +parse_binary_pct_encoded_query(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, + query := <<"?name=合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=合気道"/utf8>>} = + uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>). parse_binary_fragment(_Config) -> #{scheme := <<"foo">>, fragment := <<"nose">>} = @@ -204,6 +237,12 @@ parse_binary_fragment(_Config) -> #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = uri_string:parse(<<"//example.com/#nose">>). +parse_binary_pct_encoded_fragment(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>). + parse_scheme(_Config) -> #{} = uri_string:parse(""), #{path := "foo"} = uri_string:parse("foo"), @@ -232,6 +271,26 @@ parse_userinfo(_Config) -> #{scheme := "foo", userinfo := "user:password", host := "localhost"} = uri_string:parse("foo://user:password@localhost"). +parse_pct_encoded_userinfo(_Config) -> + #{scheme := "user", path := "合@気道"} = + uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"), + #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合", host := "気道"} = + uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合:気", host := "道"} = + uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"), + #{scheme := "foo", path := "/合気道@"} = + uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"), + #{scheme := "foo", userinfo := "合", host := "気道"} = + uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"), + #{scheme := "foo", userinfo := "合:気", host := "道"} = + uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"), + uri_parse_error =(catch uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@")), + uri_parse_error = (catch uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@")). + + parse_host(_Config) -> #{host := "hostname"} = uri_string:parse("//hostname"), #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"), @@ -307,6 +366,12 @@ parse_query(_Config) -> #{host := "example.com", path := "/", query := "?name=ferret"} = uri_string:parse("//example.com/?name=ferret"). +parse_pct_encoded_query(_Config) -> + #{scheme := "foo", host := "example.com", path := "/", + query := "?name=合気道"} = + uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", query := "?name=合気道"} = + uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"). parse_fragment(_Config) -> #{scheme := "foo", fragment := "nose"} = @@ -335,6 +400,11 @@ parse_fragment(_Config) -> #{host := "example.com", path := "/", fragment := "nose"} = uri_string:parse("//example.com/#nose"). +parse_pct_encoded_fragment(_Config) -> + #{scheme := "foo", host := "example.com", fragment := "合気道"} = + uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", fragment := "合気道"} = + uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93"). parse_list(_Config) -> #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), |