From 80feeb36f92a923f57f740c7c28c12bb8b69ec16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= This module contains functions for parsing and handling RFC 3986 compliant URIs. A URI is an identifier consisting of a sequence of characters matching the syntax
+ rule named URI in RFC 3986. The generic URI syntax consists of a hierarchical sequence of components referred
+ to as the scheme, authority, path, query, and fragment:
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+
The interpretation of a URI depends only on the characters used and not on how those + characters are represented in a network protocol.
+The functions implemented by this module covers the following use cases:
+
+
+
+
+
+
+
+
+
+
There are four different encodings present during the handling of URIs:
+
+
+
Unless otherwise specified the return value type and encoding are the same as the input
+ type and encoding. That is, binary input returns binary output, list input returns a list
+ output but mixed input returns list output. Input and output encodings are the same except
+ for
All of the functions but
Maybe improper list of bytes (0..255).
+URI map holding the main components of a URI.
+List of unicode codepoints, UTF-8 encoded binary, or a mix of the two, + representing an RFC 3986 compliant URI (percent-encoded form). + A URI is a sequence of characters from a very limited set: the letters of + the basic Latin alphabet, digits, and a few special characters.
+Composes an urlencoded
If an argument is invalid, a
Example:
++1> uri_string:compose_query(...). ++
Creates an RFC 3986 compliant
If an argument is invalid, a
Example:
++1> uri_string:create_uri_reference(...,...). ++
Dissects an urlencoded
If an argument is invalid, a
Example:
++1> uri_string:dissect_query(...). ++
Normalizes an RFC 3986 compliant
If an argument is invalid, a
Example:
++1> uri_string:normalize("http://example.org/one/two/../../one"). +"http://example.org/one" ++
Returns a
If parsing fails, a
Example:
++1> uri_string:parse("foo://user@example.com:8042/over/there?name=ferret#nose"). +#{fragment => "nose",host => "example.com", + path => "/over/there",port => 8042,query => "name=ferret", + scheme => foo,userinfo => "user"} +2>+
Returns an RFC 3986 compliant
If the
Example:
++1> URIMap = #{fragment => "nose", host => "example.com", path => "/over/there", +port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}. +#{fragment => "top",host => "example.com", + path => "/over/there",port => 8042,query => "?name=ferret", + scheme => foo,userinfo => "user"} + +2> uri_string:recompose(URIMap, []). +"foo://example.com:8042/over/there?name=ferret#nose"+
Resolves an RFC 3986 compliant
If an argument is invalid, a
Example:
++1> uri_string:resolve_uri_reference(...,...). ++
Transcodes an RFC 3986 compliant
If an argument is invalid, a
Example:
++1> uri_string:transcode(<<"foo://f%20oo">>, [{in_encoding, utf8}, +{out_encoding, utf16}]). +<<0,102,0,111,0,111,0,58,0,47,0,47,0,102,0,37,0,48,0,48,0,37,0,50,0,48,0, + 111,0,111>> ++
Maybe improper list of bytes (0..255).
->, <<>>).
+
+
+percent_encode_binary(<>, Acc) ->
+ percent_encode_binary(Rest, <>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
-hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0;
-hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10;
-hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10.
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> false;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end;
+classify_host_regname(<<>>) -> regname;
+classify_host_regname(<>) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(Rest);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%% It filters out the following combinations from the set of all possible
+%% values:
+%% - port
+%% E.g. ":8080" - invalid URI
+%% - userinfo
+%% E.g. "//user@" - invalid URI
+%% - userinfo port
+%% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"}
+%% There is always at least an empty host when both userinfo and port
+%% are present.
+%%-------------------------------------------------------------------------
+is_valid_map(Map) ->
+ case
+ (not maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ maps:is_key(port, Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ not maps:is_key(port, Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso
+ not maps:is_key(host, Map) andalso
+ maps:is_key(port, Map))
+ of
+ true ->
+ false;
+ false ->
+ true
+ end.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,encode_query(Query));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_query(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(empty) -> empty;
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_colon(empty) -> empty;
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>;
+add_colon(Comp) when is_list(Comp) ->
+ [$:|Comp].
+
+add_colon_postfix(empty) -> empty;
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(empty) -> empty;
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(_, empty) -> empty;
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index c379eeb15b..1859a25a18 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -31,9 +31,31 @@
parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
- parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ recompose_fragment/1, recompose_parse_fragment/1,
+ recompose_query/1, recompose_parse_query/1,
+ recompose_path/1, recompose_parse_path/1,
+ recompose_autogen/1, parse_recompose_autogen/1
]).
+
+-define(SCHEME, "foo").
+-define(USERINFO, "åsa").
+-define(USERINFO_ENC, "%C3%A5sa").
+-define(HOST, "älvsjö").
+-define(HOST_ENC, "%C3%A4lvsj%C3%B6").
+-define(IPV6, "::127.0.0.1").
+-define(IPV6_ENC, "[::127.0.0.1]").
+-define(PORT, 8042).
+-define(PORT_ENC, ":8042").
+-define(PATH, "/där").
+-define(PATH_ENC, "/d%C3%A4r").
+-define(QUERY, "?name=örn").
+-define(QUERY_ENC, "?name=%C3%B6rn").
+-define(FRAGMENT, "näsa").
+-define(FRAGMENT_ENC, "#n%C3%A4sa").
+
+
suite() ->
[{timetrap,{minutes,1}}].
@@ -66,12 +88,202 @@ all() ->
parse_list,
parse_binary,
parse_mixed,
- parse_relative
+ parse_relative,
+ recompose_fragment,
+ recompose_parse_fragment,
+ recompose_query,
+ recompose_parse_query,
+ recompose_path,
+ recompose_parse_path,
+ recompose_autogen,
+ parse_recompose_autogen
].
groups() ->
[].
+
+%%-------------------------------------------------------------------------
+%% Helper functions
+%%-------------------------------------------------------------------------
+uri_combinations() ->
+ [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] ||
+ Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none],
+ Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none],
+ Hst <- [fun update_host/1, fun update_host_binary/1,
+ fun update_ipv6/1, fun update_ipv6_binary/1, none],
+ Prt <- [fun update_port/1, none],
+ Pat <- [fun update_path/1, fun update_path_binary/1, none],
+ Qry <- [fun update_query/1,fun update_query_binary/1, none],
+ Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none],
+ not (Usr =:= none andalso Hst =:= none andalso Prt =/= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =:= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)].
+
+
+generate_test_vector(Comb) ->
+ Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI});
+ (_, Map) -> Map
+ end,
+ lists:foldl(Fun, {#{}, empty}, Comb).
+
+generate_test_vectors(L) ->
+ lists:map(fun generate_test_vector/1, L).
+
+update_fragment({In, empty}) ->
+ {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_list(Out) ->
+ {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_binary(Out) ->
+ {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}.
+
+update_fragment_binary({In, empty}) ->
+ {In#{fragment => <>}, <>};
+update_fragment_binary({In, Out}) when is_list(Out) ->
+ {In#{fragment => <>}, Out ++ ?FRAGMENT_ENC};
+update_fragment_binary({In, Out}) when is_binary(Out) ->
+ {In#{fragment => <>}, <>}.
+
+
+update_query({In, empty}) ->
+ {In#{query => ?QUERY}, ?QUERY_ENC};
+update_query({In, Out}) when is_list(Out) ->
+ {In#{query => ?QUERY}, Out ++ ?QUERY_ENC};
+update_query({In, Out}) when is_binary(Out) ->
+ {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}.
+
+update_query_binary({In, empty}) ->
+ {In#{query => <>}, <>};
+update_query_binary({In, Out}) when is_list(Out) ->
+ {In#{query => <>}, Out ++ ?QUERY_ENC};
+update_query_binary({In, Out}) when is_binary(Out) ->
+ {In#{query => <>}, <>}.
+
+update_path({In, empty}) ->
+ {In#{path => ?PATH}, ?PATH_ENC};
+update_path({In, Out}) when is_list(Out) ->
+ {In#{path => ?PATH}, Out ++ ?PATH_ENC};
+update_path({In, Out}) when is_binary(Out) ->
+ {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}.
+
+update_path_binary({In, empty}) ->
+ {In#{path => <>}, <>};
+update_path_binary({In, Out}) when is_list(Out) ->
+ {In#{path => <>}, Out ++ ?PATH_ENC};
+update_path_binary({In, Out}) when is_binary(Out) ->
+ {In#{path => <>}, <>}.
+
+update_port({In, Out}) when is_list(Out) ->
+ {In#{port => ?PORT}, Out ++ ?PORT_ENC};
+update_port({In, Out}) when is_binary(Out) ->
+ {In#{port => ?PORT}, <>}.
+
+update_host({In, empty}) ->
+ {In#{host => ?HOST}, "//" ++ ?HOST_ENC};
+update_host({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]}
+ end.
+
+update_host_binary({In, empty}) ->
+ {In#{host => <>}, <<"//",?HOST_ENC>>};
+update_host_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <>}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => <>}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <>}, <>};
+ false-> {In#{host => <>}, <>}
+ end.
+
+update_ipv6({In, empty}) ->
+ {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC};
+update_ipv6({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]}
+ end.
+
+update_ipv6_binary({In, empty}) ->
+ {In#{host => <>}, <<"//",?IPV6_ENC>>};
+update_ipv6_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <>}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => <>}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <>}, <>};
+ false-> {In#{host => <>}, <>}
+ end.
+
+update_userinfo({In, empty}) ->
+ {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_list(Out) ->
+ {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}.
+
+update_userinfo_binary({In, empty}) ->
+ {In#{userinfo => <>}, <<"//",?USERINFO_ENC>>};
+update_userinfo_binary({In, Out}) when is_list(Out) ->
+ {In#{userinfo => <>}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo_binary({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => <>}, <>}.
+
+update_scheme({In, empty}) ->
+ {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}.
+
+update_scheme_binary({In, empty}) ->
+ {In#{scheme => <>}, <>}.
+
+
+%% Test recompose on a generated test vector
+run_test_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(#{}) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(Map) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+%% Test parse - recompose on a generated test vector
+run_test_parse_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(uri_string:parse("")) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_parse_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(uri_string:parse(URI)) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Parse tests
+%%-------------------------------------------------------------------------
+
parse_binary_scheme(_Config) ->
#{} = uri_string:parse(<<>>),
#{path := <<"foo">>} = uri_string:parse(<<"foo">>),
@@ -438,3 +650,87 @@ parse_relative(_Config) ->
uri_string:parse(lists:append("/pa",<<"th">>)),
#{path := "foo"} =
uri_string:parse(lists:append("fo",<<"o">>)).
+
+
+%%-------------------------------------------------------------------------
+%% Recompose tests
+%%-------------------------------------------------------------------------
+recompose_fragment(_Config) ->
+ <> = uri_string:recompose(#{fragment => <>}),
+ ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT}).
+
+recompose_parse_fragment(_Config) ->
+ <> = uri_string:recompose(uri_string:parse(<>)),
+ ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)).
+
+recompose_query(_Config) ->
+ <> =
+ uri_string:recompose(#{query => <>}),
+ <> =
+ uri_string:recompose(#{query => <>,
+ fragment => <>}),
+ "?name=%C3%B6rn" =
+ uri_string:recompose(#{query => "?name=örn"}),
+ "?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{query => "?name=örn",
+ fragment => "näsa"}).
+
+recompose_parse_query(_Config) ->
+ <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)),
+ <<"?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)),
+ "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")),
+ "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")).
+
+recompose_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>}),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"?name=örn"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"?name=örn"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+
+
+ "/d%C3%A4r" =
+ uri_string:recompose(#{path => "/där"}),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ fragment => "näsa"}),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(#{path => "/där",
+ query => "?name=örn"}),
+ "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ query => "?name=örn",
+ fragment => "näsa"}).
+
+
+recompose_parse_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)),
+
+ "/d%C3%A4r" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r")),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")).
+
+
+recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_recompose/1, Tests).
+
+parse_recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_parse_recompose/1, Tests).
--
cgit v1.2.3
From 505579acda74b9281c965488f86cbd6c83254a57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Fri, 29 Sep 2017 16:54:50 +0200
Subject: stdlib: Improve calculation of parsed binary
- Improved calculation of parsed binary.
- Added tests for special corner cases.
- Fixed dialyzer warnings.
---
lib/stdlib/src/uri_string.erl | 246 +++++++++++++++++++++--------------
lib/stdlib/test/uri_string_SUITE.erl | 19 ++-
2 files changed, 164 insertions(+), 101 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 89a2c21518..bb7079c193 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -434,51 +434,36 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
%% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Userinfo = calculate_parsed_part(Rest, T),
URI1#{userinfo => decode_userinfo(Userinfo)}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part_sl(Rest, T),
URI1#{host => decode_host(remove_brackets(Host))}
end;
parse_relative_part(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
URI1#{path => decode_path(?STRING_REST($/, Path))};
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
URI1#{query => decode_query(?STRING_REST($?, Query))};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
URI1#{fragment => decode_fragment(Fragment)};
parse_relative_part(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true ->
{T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
URI1#{path => decode_path(?STRING_REST(Char, Path))};
false -> throw(uri_parse_error)
end.
-%% Returns size of 'Rest' for proper calculation of splitting position.
-%% Solves the following special case:
-%%
-%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
-%%
-%% While keeping the following true:
-%%
-%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>).
-%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>).
-%%
--spec byte_size_exl_single_slash(uri_string()) -> number().
-byte_size_exl_single_slash(<<$/>>) -> 0;
-byte_size_exl_single_slash(Rest) -> byte_size(Rest).
-
-
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 3.3. Path]
%%
@@ -516,11 +501,11 @@ parse_segment(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
@@ -539,11 +524,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
@@ -580,7 +565,7 @@ is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
case is_alpha(Char) of
true -> {T, URI1} = parse_scheme(Rest, URI),
- {Scheme, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Scheme = calculate_parsed_scheme(Rest, T),
URI1#{scheme => ?STRING_REST(Char, Scheme)};
false -> throw(uri_parse_error)
end.
@@ -618,31 +603,31 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1),
+ Userinfo = calculate_parsed_part(Rest, T),
{Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part_sl(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}}
end;
parse_hier(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_hier(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
case is_pchar(Char) of
true -> % segment_nz
{T, URI1} = parse_segment(Rest, URI),
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
false -> throw(uri_parse_error)
end;
@@ -680,7 +665,7 @@ parse_userinfo(?CHAR($@), _URI) ->
throw(uri_parse_error);
parse_userinfo(?STRING_REST($@, Rest), URI) ->
{T, URI1} = parse_host(Rest, URI),
- {Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Host = calculate_parsed_part(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}};
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
@@ -741,22 +726,22 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
parse_host(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_host(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_host(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_host(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -770,20 +755,20 @@ parse_host(?STRING_EMPTY, URI) ->
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
parse_reg_name(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_reg_name(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_reg_name(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
@@ -803,23 +788,23 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
@@ -866,20 +851,20 @@ is_ipv6(Char) -> is_hex_digit(Char).
-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ H = calculate_parsed_part(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
@@ -909,15 +894,15 @@ validate_ipv6_address(Addr) ->
-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
parse_port(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-abempty
- {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_port(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)),
+ Query = calculate_parsed_part(Rest, T),
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -943,7 +928,7 @@ parse_port(?STRING_EMPTY, URI) ->
-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
parse_query(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)),
+ Fragment = calculate_parsed_part(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
@@ -1046,13 +1031,6 @@ is_hex_digit(C)
is_hex_digit(_) -> false.
-%% Returns the size of a binary exluding the first element.
-%% Used in calls to split_binary().
--spec byte_size_exl_head(binary()) -> number().
-byte_size_exl_head(<<>>) -> 0;
-byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
-
-
%% Remove enclosing brackets from binary
-spec remove_brackets(binary()) -> binary().
remove_brackets(<<$[/utf8, Rest/binary>>) ->
@@ -1064,6 +1042,95 @@ remove_brackets(<<$[/utf8, Rest/binary>>) ->
remove_brackets(Addr) -> Addr.
+%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Handles the following special cases:
+%%
+%% #{host => [],path => "/",query => "?"} = uri_string:parse("///?")
+%% #{fragment => [],host => [],path => "/"} = uri_string:parse("///#")
+%%
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(<<$?>>, _) -> <<>>;
+calculate_parsed_part(<<$#>>, _) -> <<>>;
+calculate_parsed_part(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Used when parsing authority.
+%%
+%% Handles the following special cases:
+%%
+%% #{host => "foo",query => "?"} = uri_string:parse("//foo?")
+%% #{fragment => [],host => "foo"} = uri_string:parse("//foo#")
+%% #{host => "foo",path => "/"} = uri_string:parse("//foo/")
+%% #{host => "foo",query => "?",scheme => "http"} = uri_string:parse("http://foo?")
+%% #{fragment => [],host => "foo",scheme => "http"} = uri_string:parse("http://foo#")
+%% #{host => "foo",path => "/",scheme => "http"} = uri_string:parse("http://foo/")
+%%
+-spec calculate_parsed_part_sl(binary(), binary()) -> binary().
+calculate_parsed_part_sl(<<$?>>, _) -> <<>>;
+calculate_parsed_part_sl(<<$#>>, _) -> <<>>;
+calculate_parsed_part_sl(<<>>, _) -> <<>>;
+calculate_parsed_part_sl(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+
+ $# ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+ $/ ->
+ {First, _} =
+ split_binary(Input, byte_size(Input) - 1),
+ First;
+ _Else ->
+ {First, _} =
+ split_binary(Input, byte_size_exl_single_slash(Input)),
+ First
+ end;
+calculate_parsed_part_sl(Input, Unparsed) ->
+ {First, _} =
+ split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Returns the parsed binary based on Input and the Unparsed part.
+%% Used when parsing scheme.
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1),
+ First.
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%% Returns size of 'Rest' for proper calculation of splitting position.
+%% Solves the following special case:
+%%
+%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
+%%
+%% While keeping the following true:
+%%
+%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>).
+%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>).
+%%
+-spec byte_size_exl_single_slash(uri_string()) -> number().
+byte_size_exl_single_slash(<<$/>>) -> 0;
+byte_size_exl_single_slash(Rest) -> byte_size(Rest).
+
+
%%-------------------------------------------------------------------------
%% [RFC 3986, Chapter 2.1. Percent-Encoding]
%%
@@ -1080,23 +1147,23 @@ remove_brackets(Addr) -> Addr.
%%
%% pct-encoded = "%" HEXDIG HEXDIG
%%-------------------------------------------------------------------------
--spec decode_userinfo(list()|binary()) -> list() | binary().
+-spec decode_userinfo(binary()) -> binary().
decode_userinfo(Cs) ->
decode(Cs, fun is_userinfo/1, <<>>).
--spec decode_host(list()|binary()) -> list() | binary().
+-spec decode_host(binary()) -> binary().
decode_host(Cs) ->
decode(Cs, fun is_host/1, <<>>).
--spec decode_path(list()|binary()) -> list() | binary().
+-spec decode_path(binary()) -> binary().
decode_path(Cs) ->
decode(Cs, fun is_path/1, <<>>).
--spec decode_query(list()|binary()) -> list() | binary().
+-spec decode_query(binary()) -> binary().
decode_query(Cs) ->
decode(Cs, fun is_query/1, <<>>).
--spec decode_fragment(list()|binary()) -> list() | binary().
+-spec decode_fragment(binary()) -> binary().
decode_fragment(Cs) ->
decode(Cs, fun is_fragment/1, <<>>).
@@ -1136,7 +1203,10 @@ encode_path(Cs) ->
-spec encode_query(list()|binary()) -> list() | binary().
encode_query(Cs) ->
- encode(Cs, fun is_query/1).
+ case validate_query(Cs) of
+ true -> encode(Cs, fun is_query/1);
+ false -> throw(uri_parse_error)
+ end.
-spec encode_fragment(list()|binary()) -> list() | binary().
encode_fragment(Cs) ->
@@ -1145,7 +1215,6 @@ encode_fragment(Cs) ->
%%-------------------------------------------------------------------------
%% Helper funtions for percent-decode
%%-------------------------------------------------------------------------
--spec decode(list()|binary(), fun(), binary()) -> list() | binary().
decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
@@ -1159,21 +1228,7 @@ decode(<>, Fun, Acc) ->
false -> throw(uri_parse_error)
end;
decode(<<>>, _Fun, Acc) ->
- Acc;
-decode([$%,C0,C1|Cs], Fun, Acc) ->
- case is_hex_digit(C0) andalso is_hex_digit(C1) of
- true ->
- B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
- decode(Cs, Fun, <>);
- false -> throw(uri_parse_error)
- end;
-decode([C|Cs], Fun, Acc) ->
- case Fun(C) of
- true -> decode(Cs, Fun, <>);
- false -> throw(uri_parse_error)
- end;
-decode([], _Fun, Acc) ->
- unicode:characters_to_list(Acc).
+ Acc.
%% Check if char is allowed in host
-spec is_host(char()) -> boolean().
@@ -1186,7 +1241,6 @@ is_path($/) -> true;
is_path(Char) -> is_pchar(Char).
-
%%-------------------------------------------------------------------------
%% Helper functions for percent-encode
%%-------------------------------------------------------------------------
@@ -1206,7 +1260,7 @@ encode(<<>>, _Fun, Acc) ->
Acc.
--spec encode_codepoint_binary(integer(), fun()) -> list().
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
encode_codepoint_binary(C, Fun) ->
case Fun(C) of
false -> percent_encode_binary(C);
@@ -1240,6 +1294,11 @@ validate_scheme(<>) ->
false -> false
end.
+validate_query([$?|_]) -> true;
+validate_query(<<$?/utf8, _/binary>>) -> true;
+validate_query(_) -> false.
+
+
%%-------------------------------------------------------------------------
%% Classifies hostname into the following categories:
%% regname, ipv4 - address does not contain reserved characters to be
@@ -1248,7 +1307,7 @@ validate_scheme(<>) ->
%% encolsed in brackets
%% other - address shall be percent-encoded
%%-------------------------------------------------------------------------
-classify_host([]) -> false;
+classify_host([]) -> other;
classify_host(Addr) when is_binary(Addr) ->
A = unicode:characters_to_list(Addr),
classify_host_ipv6(A);
@@ -1272,12 +1331,6 @@ classify_host_regname([H|T]) ->
case is_reg_name(H) of
true -> classify_host_regname(T);
false -> other
- end;
-classify_host_regname(<<>>) -> regname;
-classify_host_regname(<>) ->
- case is_reg_name(H) of
- true -> classify_host_regname(Rest);
- false -> other
end.
is_ipv4_address(Addr) ->
@@ -1391,7 +1444,7 @@ update_query(#{}, URI) ->
update_fragment(#{fragment := Fragment}, empty) ->
- add_hashmark(encode_query(Fragment));
+ add_hashmark(encode_fragment(Fragment));
update_fragment(#{fragment := Fragment}, URI) ->
concat(URI,add_hashmark(encode_fragment(Fragment)));
update_fragment(#{}, empty) ->
@@ -1411,31 +1464,24 @@ concat(A, B) when is_binary(A), is_list(B) ->
concat(A, B) when is_list(A) ->
A ++ maybe_to_list(B).
-add_hashmark(empty) -> empty;
add_hashmark(Comp) when is_binary(Comp) ->
<<$#, Comp/binary>>;
add_hashmark(Comp) when is_list(Comp) ->
[$#|Comp].
-add_colon(empty) -> empty;
add_colon(Comp) when is_binary(Comp) ->
- <<$:, Comp/binary>>;
-add_colon(Comp) when is_list(Comp) ->
- [$:|Comp].
+ <<$:, Comp/binary>>.
-add_colon_postfix(empty) -> empty;
add_colon_postfix(Comp) when is_binary(Comp) ->
<>;
add_colon_postfix(Comp) when is_list(Comp) ->
Comp ++ ":".
-add_auth_prefix(empty) -> empty;
add_auth_prefix(Comp) when is_binary(Comp) ->
<<"//", Comp/binary>>;
add_auth_prefix(Comp) when is_list(Comp) ->
[$/,$/|Comp].
-add_host_prefix(_, empty) -> empty;
add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
<<$@,Host/binary>>;
add_host_prefix(#{}, Host) when is_binary(Host) ->
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 1859a25a18..0eb5105c35 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -31,7 +31,7 @@
parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
- parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, parse_special/1,
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
@@ -89,6 +89,7 @@ all() ->
parse_binary,
parse_mixed,
parse_relative,
+ parse_special,
recompose_fragment,
recompose_parse_fragment,
recompose_query,
@@ -651,6 +652,22 @@ parse_relative(_Config) ->
#{path := "foo"} =
uri_string:parse(lists:append("fo",<<"o">>)).
+parse_special(_Config) ->
+ #{host := [],query := "?"} = uri_string:parse("//?"),
+ #{fragment := [],host := []} = uri_string:parse("//#"),
+ #{host := [],query := "?",scheme := "foo"} = uri_string:parse("foo://?"),
+ #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>),
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>),
+ #{host := [],path := "/",query := "?"} = uri_string:parse("///?"),
+ #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"),
+ #{host := "foo",query := "?"} = uri_string:parse("//foo?"),
+ #{fragment := [],host := "foo"} = uri_string:parse("//foo#"),
+ #{host := "foo",path := "/"} = uri_string:parse("//foo/"),
+ #{host := "foo",query := "?",scheme := "http"} = uri_string:parse("http://foo?"),
+ #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
+ #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/").
%%-------------------------------------------------------------------------
%% Recompose tests
--
cgit v1.2.3
From 1335e59a60d5e195baf519d2c52b0ca0aa96831f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Wed, 4 Oct 2017 16:45:51 +0200
Subject: stdlib: Add property tests, bugfixes
- Add property tests using PropEr.
- Add new testcases to uri_string_SUITE.
- Improve calculation of parsed binary.
- Verify if input to parse() is UTF8 encoded.
- Update is_valid_map(): added check for path
and host.
---
lib/stdlib/src/uri_string.erl | 224 ++++++++++---
.../test/property_test/uri_string_decode.erl | 55 ----
.../test/property_test/uri_string_recompose.erl | 360 +++++++++++++++++++++
lib/stdlib/test/uri_string_SUITE.erl | 36 ++-
lib/stdlib/test/uri_string_property_test_SUITE.erl | 15 +-
5 files changed, 566 insertions(+), 124 deletions(-)
delete mode 100644 lib/stdlib/test/property_test/uri_string_decode.erl
create mode 100644 lib/stdlib/test/property_test/uri_string_recompose.erl
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index bb7079c193..893ba4c6bf 100755
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -304,8 +304,6 @@ parse(URIString) when is_list(URIString) ->
-spec recompose(URIMap) -> URIString when
URIMap :: uri_map(),
URIString :: uri_string().
-recompose(Map) when map_size(Map) =:= 0 ->
- "";
recompose(Map) ->
case is_valid_map(Map) of
false ->
@@ -405,7 +403,7 @@ convert_mapfields_to_list(Map) ->
%% URI-reference = URI / relative-ref
%%-------------------------------------------------------------------------
-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
-parse_uri_reference(<<>>, _) -> #{};
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
parse_uri_reference(URIString, URI) ->
try parse_scheme_start(URIString, URI) of
Res -> Res
@@ -434,13 +432,15 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
%% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- Userinfo = calculate_parsed_part(Rest, T),
- URI1#{userinfo => decode_userinfo(Userinfo)}
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
catch
throw:uri_parse_error ->
{T, URI1} = parse_host(Rest, URI),
Host = calculate_parsed_part_sl(Rest, T),
- URI1#{host => decode_host(remove_brackets(Host))}
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
end;
parse_relative_part(?STRING_REST($/, Rest), URI) ->
{T, URI1} = parse_segment(Rest, URI), % path-absolute
@@ -449,11 +449,13 @@ parse_relative_part(?STRING_REST($/, Rest), URI) ->
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
Query = calculate_parsed_part(Rest, T),
- URI1#{query => decode_query(?STRING_REST($?, Query))};
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(?STRING_REST($?, Query))};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
- URI1#{fragment => decode_fragment(Fragment)};
+ Fragment = calculate_parsed_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
parse_relative_part(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true ->
@@ -505,7 +507,7 @@ parse_segment(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
@@ -528,7 +530,7 @@ parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
@@ -566,10 +568,32 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
case is_alpha(Char) of
true -> {T, URI1} = parse_scheme(Rest, URI),
Scheme = calculate_parsed_scheme(Rest, T),
- URI1#{scheme => ?STRING_REST(Char, Scheme)};
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
false -> throw(uri_parse_error)
end.
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+
+%% maybe_add_path(Map) ->
+%% case length(maps:keys(Map)) of
+%% 0 ->
+%% Map#{path => <<>>};
+%% _Else ->
+%% Map
+%% end.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
@@ -603,7 +627,7 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
% Parse userinfo - "//" is NOT part of authority
try parse_userinfo(Rest, URI) of
{T, URI1} ->
- Userinfo = calculate_parsed_part(Rest, T),
+ Userinfo = calculate_parsed_userinfo(Rest, T),
{Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
throw:uri_parse_error ->
@@ -621,7 +645,7 @@ parse_hier(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
case is_pchar(Char) of
@@ -660,12 +684,11 @@ parse_hier(?STRING_EMPTY, URI) ->
%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
%%-------------------------------------------------------------------------
-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
-parse_userinfo(?CHAR($@), _URI) ->
- %% URI cannot end in userinfo state
- throw(uri_parse_error);
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
parse_userinfo(?STRING_REST($@, Rest), URI) ->
{T, URI1} = parse_host(Rest, URI),
- Host = calculate_parsed_part(Rest, T),
+ Host = calculate_parsed_host(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}};
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
@@ -726,7 +749,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
parse_host(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_host(?STRING_REST($/, Rest), URI) ->
@@ -741,7 +764,7 @@ parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_host(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -755,7 +778,7 @@ parse_host(?STRING_EMPTY, URI) ->
-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
parse_reg_name(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_reg_name(?STRING_REST($/, Rest), URI) ->
@@ -768,7 +791,7 @@ parse_reg_name(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
@@ -788,7 +811,7 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
@@ -804,7 +827,7 @@ parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
@@ -851,7 +874,7 @@ is_ipv6(Char) -> is_hex_digit(Char).
-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
{T, URI1} = parse_port(Rest, URI),
- H = calculate_parsed_part(Rest, T),
+ H = calculate_parsed_port(Rest, T),
Port = binary_to_integer(H),
{Rest, URI1#{port => Port}};
parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
@@ -864,7 +887,7 @@ parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
@@ -902,7 +925,7 @@ parse_port(?STRING_REST($?, Rest), URI) ->
{Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
@@ -928,7 +951,7 @@ parse_port(?STRING_EMPTY, URI) ->
-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
parse_query(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
- Fragment = calculate_parsed_part(Rest, T),
+ Fragment = calculate_parsed_fragment(Rest, T),
{Rest, URI1#{fragment => decode_fragment(Fragment)}};
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
@@ -1055,11 +1078,88 @@ remove_brackets(Addr) -> Addr.
-spec calculate_parsed_part(binary(), binary()) -> binary().
calculate_parsed_part(<<$?>>, _) -> <<>>;
calculate_parsed_part(<<$#>>, _) -> <<>>;
+calculate_parsed_part(<<>>, _) -> <<>>;
+calculate_parsed_part(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
calculate_parsed_part(Input, Unparsed) ->
{First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
First.
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(<<$?>>, _) -> <<>>;
+calculate_parsed_userinfo(<<$#>>, _) -> <<>>;
+calculate_parsed_userinfo(<<>>, _) -> <<>>;
+calculate_parsed_userinfo(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $@ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_userinfo(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_host(binary(), binary()) -> binary().
+calculate_parsed_host(<<$?>>, _) -> <<>>;
+calculate_parsed_host(<<$#>>, _) -> <<>>;
+calculate_parsed_host(<<>>, _) -> <<>>;
+calculate_parsed_host(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $/ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_host(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_port(binary(), binary()) -> binary().
+calculate_parsed_port(<<$?>>, _) -> <<>>;
+calculate_parsed_port(<<$#>>, _) -> <<>>;
+calculate_parsed_port(<<>>, _) -> <<>>;
+calculate_parsed_port(Input, <<>>) ->
+ case binary:last(Input) of
+ $? ->
+ init_binary(Input);
+ $# ->
+ init_binary(Input);
+ $/ ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_port(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+-spec calculate_parsed_fragment(binary(), binary()) -> binary().
+calculate_parsed_fragment(<<$#>>, _) -> <<>>;
+calculate_parsed_fragment(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
%% Returns the parsed binary based on Input and the Unparsed part.
%% Used when parsing authority.
%%
@@ -1079,28 +1179,25 @@ calculate_parsed_part_sl(<<>>, _) -> <<>>;
calculate_parsed_part_sl(Input, <<>>) ->
case binary:last(Input) of
$? ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
-
+ init_binary(Input);
$# ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
+ init_binary(Input);
$/ ->
- {First, _} =
- split_binary(Input, byte_size(Input) - 1),
- First;
+ init_binary(Input);
_Else ->
- {First, _} =
- split_binary(Input, byte_size_exl_single_slash(Input)),
- First
+ Input
end;
calculate_parsed_part_sl(Input, Unparsed) ->
{First, _} =
split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)),
First.
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
%% Returns the parsed binary based on Input and the Unparsed part.
%% Used when parsing scheme.
@@ -1109,6 +1206,7 @@ calculate_parsed_scheme(Input, Unparsed) ->
{First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1),
First.
+
%% Returns the size of a binary exluding the first element.
%% Used in calls to split_binary().
-spec byte_size_exl_head(binary()) -> number().
@@ -1149,25 +1247,35 @@ byte_size_exl_single_slash(Rest) -> byte_size(Rest).
%%-------------------------------------------------------------------------
-spec decode_userinfo(binary()) -> binary().
decode_userinfo(Cs) ->
- decode(Cs, fun is_userinfo/1, <<>>).
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
-spec decode_host(binary()) -> binary().
decode_host(Cs) ->
- decode(Cs, fun is_host/1, <<>>).
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
-spec decode_path(binary()) -> binary().
decode_path(Cs) ->
- decode(Cs, fun is_path/1, <<>>).
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
-spec decode_query(binary()) -> binary().
decode_query(Cs) ->
- decode(Cs, fun is_query/1, <<>>).
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
-spec decode_fragment(binary()) -> binary().
decode_fragment(Cs) ->
- decode(Cs, fun is_fragment/1, <<>>).
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw(uri_parse_error);
+ {error,_,_} ->
+ throw(uri_parse_error);
+ _ -> Cs
+ end.
+
%%-------------------------------------------------------------------------
%% Percent-encode
%%-------------------------------------------------------------------------
@@ -1368,10 +1476,15 @@ bracket_ipv6(Addr) when is_list(Addr) ->
%% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"}
%% There is always at least an empty host when both userinfo and port
%% are present.
+%% - #{path => "///"} otherwise the following would be true:
+%% "/////" = uri_string:recompose(#{host => "", path => "///"})
+%% "/////" = uri_string:recompose(#{path => "/////"})
+%% AND
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
%%-------------------------------------------------------------------------
is_valid_map(Map) ->
case
- (not maps:is_key(userinfo, Map) andalso
+ ((not maps:is_key(userinfo, Map) andalso
not maps:is_key(host, Map) andalso
maps:is_key(port, Map))
orelse
@@ -1381,7 +1494,9 @@ is_valid_map(Map) ->
orelse
(maps:is_key(userinfo, Map) andalso
not maps:is_key(host, Map) andalso
- maps:is_key(port, Map))
+ maps:is_key(port, Map))) orelse
+ not maps:is_key(path, Map) orelse
+ not is_host_and_path_valid(Map)
of
true ->
false;
@@ -1390,6 +1505,19 @@ is_valid_map(Map) ->
end.
+is_host_and_path_valid(Map) ->
+ Host = maps:get(host, Map, undefined),
+ Path = maps:get(path, Map, undefined),
+ not (Host =:= undefined andalso starts_with_two_slash(Path)).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
update_scheme(#{scheme := Scheme}, _) ->
add_colon_postfix(encode_scheme(Scheme));
update_scheme(#{}, _) ->
diff --git a/lib/stdlib/test/property_test/uri_string_decode.erl b/lib/stdlib/test/property_test/uri_string_decode.erl
deleted file mode 100644
index 137a649cf1..0000000000
--- a/lib/stdlib/test/property_test/uri_string_decode.erl
+++ /dev/null
@@ -1,55 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%% http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
--module(uri_string_decode).
-
--compile(export_all).
-
--proptest(eqc).
--proptest([triq,proper]).
-
--ifndef(EQC).
--ifndef(PROPER).
--ifndef(TRIQ).
--define(EQC,true).
--endif.
--endif.
--endif.
-
--ifdef(EQC).
--include_lib("eqc/include/eqc.hrl").
--define(MOD_eqc,eqc).
-
--else.
--ifdef(PROPER).
--include_lib("proper/include/proper.hrl").
--define(MOD_eqc,proper).
-
--else.
--ifdef(TRIQ).
--define(MOD_eqc,triq).
--include_lib("triq/include/triq.hrl").
-
--endif.
--endif.
--endif.
-
-
-prop_uri_string_decode() ->
- ok.
diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl
new file mode 100644
index 0000000000..dad67cd4c1
--- /dev/null
+++ b/lib/stdlib/test/property_test/uri_string_recompose.erl
@@ -0,0 +1,360 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_recompose).
+
+-compile(export_all).
+
+-proptest(eqc).
+-proptest([triq,proper]).
+
+-ifndef(EQC).
+-ifndef(PROPER).
+-ifndef(TRIQ).
+-define(EQC,true).
+-endif.
+-endif.
+-endif.
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-define(MOD_eqc,eqc).
+
+-else.
+-ifdef(PROPER).
+-include_lib("proper/include/proper.hrl").
+-define(MOD_eqc,proper).
+
+-else.
+-ifdef(TRIQ).
+-define(MOD_eqc,triq).
+-include_lib("triq/include/triq.hrl").
+
+-endif.
+-endif.
+-endif.
+
+
+-define(STRING_REST(MatchStr, Rest), <>).
+
+-define(SCHEME, {scheme, scheme()}).
+-define(USER, {userinfo, unicode()}).
+-define(HOST, {host, host_map()}).
+-define(PORT, {port, port()}).
+-define(PATH_ABE, {path, path_abempty_map()}).
+-define(PATH_ABS, {path, path_absolute_map()}).
+-define(PATH_NOS, {path, path_noscheme_map()}).
+-define(PATH_ROO, {path, path_rootless_map()}).
+-define(PATH_EMP, {path, path_empty_map()}).
+-define(QUERY, {query, query_map()}).
+-define(FRAGMENT, {fragment, fragment_map()}).
+
+
+%%%========================================================================
+%%% Properties
+%%%========================================================================
+
+prop_recompose() ->
+ ?FORALL(Map, map(),
+ Map =:= uri_string:parse(uri_string:recompose(Map))
+ ).
+
+%% Stats
+prop_map_key_length_collect() ->
+ ?FORALL(List, map(),
+ collect(length(maps:keys(List)), true)).
+
+prop_map_collect() ->
+ ?FORALL(List, map(),
+ collect(lists:sort(maps:keys(List)), true)).
+
+prop_scheme_collect() ->
+ ?FORALL(List, scheme(),
+ collect(length(List), true)).
+
+
+%%%========================================================================
+%%% Generators
+%%%========================================================================
+
+map() ->
+ ?LET(Gen, comp_proplist(), proplist_to_map(Gen)).
+
+comp_proplist() ->
+ frequency([
+ {2, [?SCHEME,?PATH_ABS]},
+ {2, [?SCHEME,?PATH_ROO]},
+ {2, [?SCHEME,?PATH_EMP]},
+ {2, [?SCHEME,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]},
+
+ {2, [?PATH_ABS]},
+ {2, [?PATH_NOS]},
+ {2, [?PATH_EMP]},
+ {2, [?HOST,?PATH_ABE]},
+ {2, [?USER,?HOST,?PATH_ABE]},
+ {2, [?HOST,?PORT,?PATH_ABE]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+ {2, [?PATH_ABS,?QUERY]},
+ {2, [?PATH_NOS,?QUERY]},
+ {2, [?PATH_EMP,?QUERY]},
+ {2, [?HOST,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?FRAGMENT]},
+ {2, [?PATH_NOS,?FRAGMENT]},
+ {2, [?PATH_EMP,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_NOS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}
+ ]).
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+path_abempty_map() ->
+ frequency([{90, path_abe_map()},
+ {10, path_empty_map()}]).
+
+path_abe_map() ->
+ ?SIZED(Length, path_abe_map(Length, [])).
+%%
+path_abe_map(0, Segments) ->
+ ?LET(Gen, Segments, lists:append(Gen));
+path_abe_map(N, Segments) ->
+ path_abe_map(N-1, [slash(),segment()|Segments]).
+
+
+path_absolute_map() ->
+ ?SIZED(Length, path_absolute_map(Length, [])).
+%%
+path_absolute_map(0, Segments) ->
+ ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen));
+path_absolute_map(N, Segments) ->
+ path_absolute_map(N-1, [slash(),segment()|Segments]).
+
+
+path_noscheme_map() ->
+ ?SIZED(Length, path_noscheme_map(Length, [])).
+%%
+path_noscheme_map(0, Segments) ->
+ ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen));
+path_noscheme_map(N, Segments) ->
+ path_noscheme_map(N-1, [slash(),segment()|Segments]).
+
+path_rootless_map() ->
+ ?SIZED(Length, path_rootless_map(Length, [])).
+%%
+path_rootless_map(0, Segments) ->
+ ?LET(Gen, [segment_nz()|Segments], lists:append(Gen));
+path_rootless_map(N, Segments) ->
+ path_rootless_map(N-1, [slash(),segment()|Segments]).
+
+
+segment_nz() ->
+ non_empty(segment()).
+
+segment_nz_nc() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$@])}
+ ]))).
+
+
+segment() ->
+ list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$:, $@])}
+ ])).
+
+slash() ->
+ "/".
+
+path_empty_map() ->
+ "".
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+host_map() ->
+ frequency([{30, reg_name()},
+ {30, ip_address()}
+ ]).
+
+
+reg_name() ->
+ list(frequency([{30, alpha()},
+ {10, sub_delims()},
+ {10, unicode_char()}
+ ])).
+
+ip_address() ->
+ oneof(["127.0.0.1", "::127.0.0.1",
+ "2001:0db8:0000:0000:0000:0000:1428:07ab",
+ "2001:0db8:0000:0000:0000::1428:07ab",
+ "2001:0db8:0:0:0:0:1428:07ab",
+ "2001:0db8:0::0:1428:07ab"]).
+
+%% Generating only reg-names
+host_uri() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, pct_encoded()}
+ ]))).
+
+%%-------------------------------------------------------------------------
+%% Port, Query, Fragment
+%%-------------------------------------------------------------------------
+port() ->
+ range(1,65535).
+
+
+query_map() ->
+ [$?| unicode()].
+
+
+query_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+fragment_map() ->
+ unicode().
+
+fragment_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+
+%%-------------------------------------------------------------------------
+%% Scheme
+%%-------------------------------------------------------------------------
+scheme() ->
+ ?SIZED(Length, scheme_start(Length, [])).
+%%
+scheme_start(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme_start(N, L) ->
+ scheme(N-1,[alpha()|L]).
+
+scheme(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme(N, L) ->
+ scheme(N-1, [scheme_char()|L]).
+
+
+%%-------------------------------------------------------------------------
+%% Misc
+%%-------------------------------------------------------------------------
+unicode() ->
+ list(frequency([{20, alpha()}, % alpha
+ {10, digit()}, % digit
+ {10, unicode_char()} % unicode
+ ])).
+
+scheme_char() ->
+ frequency([{20, alpha()}, % alpha
+ {20, digit()}, % digit
+ {5, oneof([$+, $-, $.])} % punctuation
+ ]).
+
+sub_delims() ->
+ oneof([$!, $$, $&, $', $(, $),
+ $*, $+, $,,$;, $=]).
+
+pchar() ->
+ frequency([{20, unreserved()},
+ {5, pct_encoded()},
+ {5, sub_delims()},
+ {1, oneof([$:, $@])} % punctuation
+ ]).
+
+unreserved() ->
+ frequency([{20, alpha()},
+ {5, digit()},
+ {1, oneof([$-, $., $_, $~])} % punctuation
+ ]).
+
+unicode_char() ->
+ range(913, 1023).
+
+alpha() ->
+ frequency([{20, range($a, $z)}, % letters
+ {20, range($A, $Z)}]). % letters
+
+digit() ->
+ range($0, $9). % numbers
+
+pct_encoded() ->
+ oneof(["%C3%A4", "%C3%A5", "%C3%B6"]).
+
+
+%%%========================================================================
+%%% Helpers
+%%%========================================================================
+proplist_to_map(L) ->
+ lists:foldl(fun({K,V},M) -> M#{K => V};
+ (_,M) -> M
+ end, #{}, L).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 0eb5105c35..cd2e003d02 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -31,7 +31,8 @@
parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
- parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, parse_special/1,
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ parse_special/1, parse_special2/1,
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
@@ -90,6 +91,7 @@ all() ->
parse_mixed,
parse_relative,
parse_special,
+ parse_special2,
recompose_fragment,
recompose_parse_fragment,
recompose_query,
@@ -114,7 +116,7 @@ uri_combinations() ->
Hst <- [fun update_host/1, fun update_host_binary/1,
fun update_ipv6/1, fun update_ipv6_binary/1, none],
Prt <- [fun update_port/1, none],
- Pat <- [fun update_path/1, fun update_path_binary/1, none],
+ Pat <- [fun update_path/1, fun update_path_binary/1],
Qry <- [fun update_query/1,fun update_query_binary/1, none],
Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none],
not (Usr =:= none andalso Hst =:= none andalso Prt =/= none),
@@ -312,9 +314,7 @@ parse_binary_userinfo(_Config) ->
#{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} =
uri_string:parse(<<"foo://user@localhost">>),
#{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} =
- uri_string:parse(<<"foo://user:password@localhost">>),
- uri_parse_error =(catch uri_string:parse(<<"//user@">>)),
- uri_parse_error = (catch uri_string:parse(<<"foo://user@">>)).
+ uri_string:parse(<<"foo://user:password@localhost">>).
parse_binary_pct_encoded_userinfo(_Config) ->
#{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
@@ -667,14 +667,24 @@ parse_special(_Config) ->
#{host := "foo",path := "/"} = uri_string:parse("//foo/"),
#{host := "foo",query := "?",scheme := "http"} = uri_string:parse("http://foo?"),
#{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
- #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/").
+ #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"),
+ #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"),
+ #{host := "host",port := 80,query := "?",scheme := "http"} = uri_string:parse("http://host:80?").
+
+parse_special2(_Config) ->
+ #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"),
+ #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"),
+ #{host := [],path := [],userinfo := []} = uri_string:parse("//@"),
+ #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"),
+ #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"),
+ #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/").
%%-------------------------------------------------------------------------
%% Recompose tests
%%-------------------------------------------------------------------------
recompose_fragment(_Config) ->
- <> = uri_string:recompose(#{fragment => <>}),
- ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT}).
+ <> = uri_string:recompose(#{fragment => <>, path => <<>>}),
+ ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}).
recompose_parse_fragment(_Config) ->
<> = uri_string:recompose(uri_string:parse(<>)),
@@ -682,15 +692,17 @@ recompose_parse_fragment(_Config) ->
recompose_query(_Config) ->
<> =
- uri_string:recompose(#{query => <>}),
+ uri_string:recompose(#{query => <>, path => <<>>}),
<> =
uri_string:recompose(#{query => <>,
- fragment => <>}),
+ fragment => <>,
+ path => <<>>}),
"?name=%C3%B6rn" =
- uri_string:recompose(#{query => "?name=örn"}),
+ uri_string:recompose(#{query => "?name=örn", path => ""}),
"?name=%C3%B6rn#n%C3%A4sa" =
uri_string:recompose(#{query => "?name=örn",
- fragment => "näsa"}).
+ fragment => "näsa",
+ path => ""}).
recompose_parse_query(_Config) ->
<<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)),
diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl
index de5edf54aa..ae2c61c7aa 100644
--- a/lib/stdlib/test/uri_string_property_test_SUITE.erl
+++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl
@@ -20,10 +20,9 @@
-module(uri_string_property_test_SUITE).
-include_lib("common_test/include/ct.hrl").
-
-compile(export_all).
-all() -> [decode].
+all() -> [recompose].
init_per_suite(Config) ->
ct_property_test:init_per_suite(Config).
@@ -31,12 +30,10 @@ init_per_suite(Config) ->
end_per_suite(Config) ->
Config.
-%%%================================================================
+%%%========================================================================
%%% Test suites
-%%%
-
-decode(Config) ->
+%%%========================================================================
+recompose(Config) ->
ct_property_test:quickcheck(
- uri_string_decode:prop_uri_string_decode(),
- Config
- ).
+ uri_string_recompose:prop_recompose(),
+ Config).
--
cgit v1.2.3
From 4a2358bbf4a4049a765aab435a31daeeffbbd677 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Wed, 11 Oct 2017 16:36:14 +0200
Subject: stdlib: Implement transcode/2.
---
lib/stdlib/src/uri_string.erl | 112 ++++++++++++++++++++++++++++++++++-
lib/stdlib/test/uri_string_SUITE.erl | 39 +++++++++++-
2 files changed, 147 insertions(+), 4 deletions(-)
mode change 100755 => 100644 lib/stdlib/src/uri_string.erl
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
old mode 100755
new mode 100644
index 893ba4c6bf..439ffa80da
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -353,8 +353,26 @@ normalize(_) ->
-spec transcode(URIString, Options) -> URIString when
URIString :: uri_string(),
Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}].
-transcode(_, _) ->
- "".
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_binary(Output, utf8, OutEnc)
+ of
+ Result -> Result
+ catch
+ throw:{error, L, RestData} -> {invalid_input, L, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ try transcode(URIString, [], InEnc, OutEnc) of
+ Result -> Result
+ catch
+ throw:{error, List, RestData} -> {invalid_input, List, RestData}
+ end.
%%-------------------------------------------------------------------------
%% Working with query strings
@@ -1624,3 +1642,93 @@ maybe_to_list(Comp) -> Comp.
encode_port(Port) ->
integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([H|T], Acc, List, InEnc, OutEnc) when is_binary(H) ->
+ L = convert_list(H, InEnc),
+ transcode(L ++ T, Acc, List, InEnc, OutEnc);
+transcode([H|T], Acc, List, InEnc, OutEnc) when is_list(H) ->
+ transcode(H ++ T, Acc, List, InEnc, OutEnc);
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_binary(H) ->
+ L = convert_list(H, InEnc),
+ transcode_pct(L ++ T, Acc, B, InEnc, OutEnc);
+transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_list(H) ->
+ transcode_pct(H ++ T, Acc, B, InEnc, OutEnc);
+transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <>, InEncoding, OutEncoding);
+ false -> throw({error, lists:reverse(Acc),[C0,C1]})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+% Convert binary
+convert_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, List, RestData} ->
+ throw({error, List, RestData});
+ {incomplete, List, RestData} ->
+ throw({error, List, RestData});
+ Result ->
+ Result
+ end.
+
+
+% Convert binary
+convert_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, List, RestData} ->
+ throw({error, List, RestData});
+ {incomplete, List, RestData} ->
+ throw({error, List, RestData});
+ Result ->
+ Result
+ end.
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index cd2e003d02..83f702dd13 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -36,7 +36,8 @@
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
- recompose_autogen/1, parse_recompose_autogen/1
+ recompose_autogen/1, parse_recompose_autogen/1,
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
]).
@@ -99,7 +100,11 @@ all() ->
recompose_path,
recompose_parse_path,
recompose_autogen,
- parse_recompose_autogen
+ parse_recompose_autogen,
+ transcode_basic,
+ transcode_options,
+ transcode_mixed,
+ transcode_negative
].
groups() ->
@@ -763,3 +768,33 @@ recompose_autogen(_Config) ->
parse_recompose_autogen(_Config) ->
Tests = generate_test_vectors(uri_combinations()),
lists:map(fun run_test_parse_recompose/1, Tests).
+
+transcode_basic(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]).
+
+transcode_options(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []),
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]).
+
+transcode_mixed(_Config) ->
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]).
+
+transcode_negative(_Config) ->
+ {invalid_input,"foo","BX"} =
+ uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ {invalid_input,<<>>,<<"ö">>} =
+ uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
--
cgit v1.2.3
From 57f8021105f1c213be674681f48d0c8e92935ff6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Mon, 16 Oct 2017 13:30:36 +0200
Subject: stdlib: Change handling of queries ["?" query]
Previously when parsing queries the first "?" was part of the
parsed query in the result Map. This behavior has been changed
to follow the patterns used with other URI components and to
not include the special character(s) that mark the start of a
specific component.
---
lib/stdlib/src/uri_string.erl | 80 +++++++++---------
.../test/property_test/uri_string_recompose.erl | 2 +-
lib/stdlib/test/uri_string_SUITE.erl | 97 +++++++++++-----------
3 files changed, 93 insertions(+), 86 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 439ffa80da..f9e1e273bc 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -466,9 +466,9 @@ parse_relative_part(?STRING_REST($/, Rest), URI) ->
URI1#{path => decode_path(?STRING_REST($/, Path))};
parse_relative_part(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
+ Query = calculate_parsed_query(Rest, T),
URI2 = maybe_add_path(URI1),
- URI2#{query => decode_query(?STRING_REST($?, Query))};
+ URI2#{query => decode_query(Query)};
parse_relative_part(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
Fragment = calculate_parsed_fragment(Rest, T),
@@ -521,8 +521,8 @@ parse_segment(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_segment(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
Fragment = calculate_parsed_fragment(Rest, T),
@@ -544,8 +544,8 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
parse_segment(Rest, URI); % segment
parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI),
Fragment = calculate_parsed_fragment(Rest, T),
@@ -595,14 +595,6 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
%% According to the URI specification there is always a
%% path component in every URI-reference and it can be
%% empty.
-
-%% maybe_add_path(Map) ->
-%% case length(maps:keys(Map)) of
-%% 0 ->
-%% Map#{path => <<>>};
-%% _Else ->
-%% Map
-%% end.
maybe_add_path(Map) ->
case maps:is_key(path, Map) of
false ->
@@ -659,8 +651,8 @@ parse_hier(?STRING_REST($/, Rest), URI) ->
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_hier(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_hier(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
Fragment = calculate_parsed_fragment(Rest, T),
@@ -776,8 +768,8 @@ parse_host(?STRING_REST($/, Rest), URI) ->
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_host(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_host(?STRING_REST($[, Rest), URI) ->
parse_ipv6_bin(Rest, [], URI);
parse_host(?STRING_REST($#, Rest), URI) ->
@@ -805,8 +797,8 @@ parse_reg_name(?STRING_REST($/, Rest), URI) ->
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_reg_name(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_reg_name(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
Fragment = calculate_parsed_fragment(Rest, T),
@@ -840,8 +832,8 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
{T, URI1} = parse_fragment(Rest, URI), % path-empty
@@ -901,8 +893,8 @@ parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
Fragment = calculate_parsed_fragment(Rest, T),
@@ -939,8 +931,8 @@ parse_port(?STRING_REST($/, Rest), URI) ->
{Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
parse_port(?STRING_REST($?, Rest), URI) ->
{T, URI1} = parse_query(Rest, URI), % path-empty ?query
- Query = calculate_parsed_part(Rest, T),
- {Rest, URI1#{query => decode_query(?STRING_REST($?, Query))}};
+ Query = calculate_parsed_query(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
parse_port(?STRING_REST($#, Rest), URI) ->
{T, URI1} = parse_fragment(Rest, URI), % path-empty
Fragment = calculate_parsed_fragment(Rest, T),
@@ -1090,7 +1082,7 @@ remove_brackets(Addr) -> Addr.
%% Returns the parsed binary based on Input and the Unparsed part.
%% Handles the following special cases:
%%
-%% #{host => [],path => "/",query => "?"} = uri_string:parse("///?")
+%% #{host => [],path => "/",query => []} = uri_string:parse("///?")
%% #{fragment => [],host => [],path => "/"} = uri_string:parse("///#")
%%
-spec calculate_parsed_part(binary(), binary()) -> binary().
@@ -1171,6 +1163,20 @@ calculate_parsed_port(Input, Unparsed) ->
First.
+calculate_parsed_query(<<$#>>, _) -> <<>>;
+calculate_parsed_query(<<>>, _) -> <<>>;
+calculate_parsed_query(Input, <<>>) ->
+ case binary:last(Input) of
+ $# ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+calculate_parsed_query(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
-spec calculate_parsed_fragment(binary(), binary()) -> binary().
calculate_parsed_fragment(<<$#>>, _) -> <<>>;
calculate_parsed_fragment(Input, Unparsed) ->
@@ -1183,10 +1189,10 @@ calculate_parsed_fragment(Input, Unparsed) ->
%%
%% Handles the following special cases:
%%
-%% #{host => "foo",query => "?"} = uri_string:parse("//foo?")
+%% #{host => "foo",query => []} = uri_string:parse("//foo?")
%% #{fragment => [],host => "foo"} = uri_string:parse("//foo#")
%% #{host => "foo",path => "/"} = uri_string:parse("//foo/")
-%% #{host => "foo",query => "?",scheme => "http"} = uri_string:parse("http://foo?")
+%% #{host => "foo",query => [],scheme => "http"} = uri_string:parse("http://foo?")
%% #{fragment => [],host => "foo",scheme => "http"} = uri_string:parse("http://foo#")
%% #{host => "foo",path => "/",scheme => "http"} = uri_string:parse("http://foo/")
%%
@@ -1329,10 +1335,7 @@ encode_path(Cs) ->
-spec encode_query(list()|binary()) -> list() | binary().
encode_query(Cs) ->
- case validate_query(Cs) of
- true -> encode(Cs, fun is_query/1);
- false -> throw(uri_parse_error)
- end.
+ encode(Cs, fun is_query/1).
-spec encode_fragment(list()|binary()) -> list() | binary().
encode_fragment(Cs) ->
@@ -1420,10 +1423,6 @@ validate_scheme(<>) ->
false -> false
end.
-validate_query([$?|_]) -> true;
-validate_query(<<$?/utf8, _/binary>>) -> true;
-validate_query(_) -> false.
-
%%-------------------------------------------------------------------------
%% Classifies hostname into the following categories:
@@ -1582,7 +1581,7 @@ update_path(#{}, URI) ->
update_query(#{query := Query}, empty) ->
encode_query(Query);
update_query(#{query := Query}, URI) ->
- concat(URI,encode_query(Query));
+ concat(URI,add_question_mark(encode_query(Query)));
update_query(#{}, empty) ->
empty;
update_query(#{}, URI) ->
@@ -1615,6 +1614,11 @@ add_hashmark(Comp) when is_binary(Comp) ->
add_hashmark(Comp) when is_list(Comp) ->
[$#|Comp].
+add_question_mark(Comp) when is_binary(Comp) ->
+ <<$?, Comp/binary>>;
+add_question_mark(Comp) when is_list(Comp) ->
+ [$?|Comp].
+
add_colon(Comp) when is_binary(Comp) ->
<<$:, Comp/binary>>.
diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl
index dad67cd4c1..97f9d727a0 100644
--- a/lib/stdlib/test/property_test/uri_string_recompose.erl
+++ b/lib/stdlib/test/property_test/uri_string_recompose.erl
@@ -271,7 +271,7 @@ port() ->
query_map() ->
- [$?| unicode()].
+ unicode().
query_uri() ->
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 83f702dd13..8a10948f32 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -52,7 +52,7 @@
-define(PORT_ENC, ":8042").
-define(PATH, "/där").
-define(PATH_ENC, "/d%C3%A4r").
--define(QUERY, "?name=örn").
+-define(QUERY, "name=örn").
-define(QUERY_ENC, "?name=%C3%B6rn").
-define(FRAGMENT, "näsa").
-define(FRAGMENT_ENC, "#n%C3%A4sa").
@@ -350,7 +350,7 @@ parse_binary_host_ipv4(_Config) ->
#{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>),
#{host := <<"127.0.0.1">>, path := <<"/over/there">>} =
uri_string:parse(<<"//127.0.0.1/over/there">>),
- #{host := <<"127.0.0.1">>, query := <<"?name=ferret">>} =
+ #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} =
uri_string:parse(<<"//127.0.0.1?name=ferret">>),
#{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>),
uri_parse_error = (catch uri_string:parse(<<"//127.0.0.x">>)),
@@ -362,7 +362,7 @@ parse_binary_host_ipv6(_Config) ->
uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>),
#{host := <<"::127.0.0.1">>, path := <<"/over/there">>} =
uri_string:parse(<<"//[::127.0.0.1]/over/there">>),
- #{host := <<"::127.0.0.1">>, query := <<"?name=ferret">>} =
+ #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} =
uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>),
#{host := <<"::127.0.0.1">>, fragment := <<"nose">>} =
uri_string:parse(<<"//[::127.0.0.1]#nose">>),
@@ -397,35 +397,35 @@ parse_binary_path(_Config) ->
uri_string:parse(<<"foo://example.com:8042/over/there">>).
parse_binary_query(_Config) ->
- #{scheme := <<"foo">>, query := <<"?name=ferret">>} =
+ #{scheme := <<"foo">>, query := <<"name=ferret">>} =
uri_string:parse(<<"foo:?name=ferret">>),
- #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"?name=ferret">>} =
+ #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} =
uri_string:parse(<<"foo:over/there?name=ferret">>),
- #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"?name=ferret">>} =
+ #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} =
uri_string:parse(<<"foo:/over/there?name=ferret">>),
- #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"?name=ferret">>} =
+ #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} =
uri_string:parse(<<"foo://example.com?name=ferret">>),
- #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} =
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
uri_string:parse(<<"foo://example.com/?name=ferret">>),
- #{query := <<"?name=ferret">>} =
+ #{path := <<>>, query := <<"name=ferret">>} =
uri_string:parse(<<"?name=ferret">>),
- #{path := <<"over/there">>, query := <<"?name=ferret">>} =
+ #{path := <<"over/there">>, query := <<"name=ferret">>} =
uri_string:parse(<<"over/there?name=ferret">>),
- #{path := <<"/">>, query := <<"?name=ferret">>} =
+ #{path := <<"/">>, query := <<"name=ferret">>} =
uri_string:parse(<<"/?name=ferret">>),
- #{path := <<"/over/there">>, query := <<"?name=ferret">>} =
+ #{path := <<"/over/there">>, query := <<"name=ferret">>} =
uri_string:parse(<<"/over/there?name=ferret">>),
- #{host := <<"example.com">>, query := <<"?name=ferret">>} =
+ #{host := <<"example.com">>, query := <<"name=ferret">>} =
uri_string:parse(<<"//example.com?name=ferret">>),
- #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} =
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
uri_string:parse(<<"//example.com/?name=ferret">>).
parse_binary_pct_encoded_query(_Config) ->
#{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
- query := <<"?name=合気道"/utf8>>} =
+ query := <<"name=合気道"/utf8>>} =
uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>),
- #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=合気道"/utf8>>} =
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} =
uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>).
parse_binary_fragment(_Config) ->
@@ -520,7 +520,7 @@ parse_host_ipv4(_Config) ->
#{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} =
uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"),
#{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"),
- #{host := "127.0.0.1", query := "?name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
+ #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
#{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"),
uri_parse_error = (catch uri_string:parse("//127.0.0.x")),
uri_parse_error = (catch uri_string:parse("//1227.0.0.1")).
@@ -528,7 +528,7 @@ parse_host_ipv4(_Config) ->
parse_host_ipv6(_Config) ->
#{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"),
#{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"),
- #{host := "::127.0.0.1", query := "?name=ferret"} =
+ #{host := "::127.0.0.1", query := "name=ferret"} =
uri_string:parse("//[::127.0.0.1]?name=ferret"),
#{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"),
uri_parse_error = (catch uri_string:parse("//[::127.0.0.x]")),
@@ -560,35 +560,35 @@ parse_path(_Config) ->
uri_string:parse("foo://example.com:8042/over/there").
parse_query(_Config) ->
- #{scheme := "foo", query := "?name=ferret"} =
+ #{scheme := "foo", query := "name=ferret"} =
uri_string:parse("foo:?name=ferret"),
- #{scheme := "foo", path:= "over/there", query := "?name=ferret"} =
+ #{scheme := "foo", path:= "over/there", query := "name=ferret"} =
uri_string:parse("foo:over/there?name=ferret"),
- #{scheme := "foo", path:= "/over/there", query := "?name=ferret"} =
+ #{scheme := "foo", path:= "/over/there", query := "name=ferret"} =
uri_string:parse("foo:/over/there?name=ferret"),
- #{scheme := "foo", host := "example.com", query := "?name=ferret"} =
+ #{scheme := "foo", host := "example.com", query := "name=ferret"} =
uri_string:parse("foo://example.com?name=ferret"),
- #{scheme := "foo", host := "example.com", path := "/", query := "?name=ferret"} =
+ #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} =
uri_string:parse("foo://example.com/?name=ferret"),
- #{query := "?name=ferret"} =
+ #{path := "", query := "name=ferret"} =
uri_string:parse("?name=ferret"),
- #{path := "over/there", query := "?name=ferret"} =
+ #{path := "over/there", query := "name=ferret"} =
uri_string:parse("over/there?name=ferret"),
- #{path := "/", query := "?name=ferret"} =
+ #{path := "/", query := "name=ferret"} =
uri_string:parse("/?name=ferret"),
- #{path := "/over/there", query := "?name=ferret"} =
+ #{path := "/over/there", query := "name=ferret"} =
uri_string:parse("/over/there?name=ferret"),
- #{host := "example.com", query := "?name=ferret"} =
+ #{host := "example.com", query := "name=ferret"} =
uri_string:parse("//example.com?name=ferret"),
- #{host := "example.com", path := "/", query := "?name=ferret"} =
+ #{host := "example.com", path := "/", query := "name=ferret"} =
uri_string:parse("//example.com/?name=ferret").
parse_pct_encoded_query(_Config) ->
#{scheme := "foo", host := "example.com", path := "/",
- query := "?name=合気道"} =
+ query := "name=合気道"} =
uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"),
- #{host := "example.com", path := "/", query := "?name=合気道"} =
+ #{host := "example.com", path := "/", query := "name=合気道"} =
uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93").
parse_fragment(_Config) ->
@@ -627,19 +627,19 @@ parse_pct_encoded_fragment(_Config) ->
parse_list(_Config) ->
#{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),
#{scheme := "foo", host := "example.com", port := 8042,
- path := "/over/there", query := "?name=ferret", fragment := "nose"} =
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"),
#{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042,
- path := "/over/there", query := "?name=ferret", fragment := "nose"} =
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
uri_string:parse("foo://admin:admin@example.com:8042/over/there?name=ferret#nose").
parse_binary(_Config) ->
#{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>),
#{scheme := <<"foo">>, host := <<"example.com">>, port := 8042,
- path := <<"/over/there">>, query := <<"?name=ferret">>, fragment := <<"nose">>} =
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>),
#{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042,
- path := <<"/over/there">>, query := <<"?name=ferret">>, fragment := <<"nose">>} =
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
uri_string:parse(<<"foo://admin:admin@example.com:8042/over/there?name=ferret#nose">>).
@@ -658,23 +658,26 @@ parse_relative(_Config) ->
uri_string:parse(lists:append("fo",<<"o">>)).
parse_special(_Config) ->
- #{host := [],query := "?"} = uri_string:parse("//?"),
+ #{host := [],query := []} = uri_string:parse("//?"),
#{fragment := [],host := []} = uri_string:parse("//#"),
- #{host := [],query := "?",scheme := "foo"} = uri_string:parse("foo://?"),
+ #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"),
#{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"),
#{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>),
#{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
#{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>),
- #{host := [],path := "/",query := "?"} = uri_string:parse("///?"),
+ #{host := [],path := "/",query := []} = uri_string:parse("///?"),
#{fragment := [],host := [],path := "/"} = uri_string:parse("///#"),
- #{host := "foo",query := "?"} = uri_string:parse("//foo?"),
+ #{host := "foo",query := []} = uri_string:parse("//foo?"),
#{fragment := [],host := "foo"} = uri_string:parse("//foo#"),
#{host := "foo",path := "/"} = uri_string:parse("//foo/"),
- #{host := "foo",query := "?",scheme := "http"} = uri_string:parse("http://foo?"),
+ #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"),
#{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
#{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"),
#{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"),
- #{host := "host",port := 80,query := "?",scheme := "http"} = uri_string:parse("http://host:80?").
+ #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"),
+ #{path := [],query := []} = uri_string:parse("?"),
+ #{path := [],query := "?"} = uri_string:parse("??"),
+ #{path := [],query := "??"} = uri_string:parse("???").
parse_special2(_Config) ->
#{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"),
@@ -703,9 +706,9 @@ recompose_query(_Config) ->
fragment => <>,
path => <<>>}),
"?name=%C3%B6rn" =
- uri_string:recompose(#{query => "?name=örn", path => ""}),
+ uri_string:recompose(#{query => "name=örn", path => ""}),
"?name=%C3%B6rn#n%C3%A4sa" =
- uri_string:recompose(#{query => "?name=örn",
+ uri_string:recompose(#{query => "name=örn",
fragment => "näsa",
path => ""}).
@@ -724,10 +727,10 @@ recompose_path(_Config) ->
fragment => <<"näsa"/utf8>>}),
<<"/d%C3%A4r?name=%C3%B6rn">> =
uri_string:recompose(#{path => <<"/där"/utf8>>,
- query => <<"?name=örn"/utf8>>}),
+ query => <<"name=örn"/utf8>>}),
<<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> =
uri_string:recompose(#{path => <<"/där"/utf8>>,
- query => <<"?name=örn"/utf8>>,
+ query => <<"name=örn"/utf8>>,
fragment => <<"näsa"/utf8>>}),
@@ -738,10 +741,10 @@ recompose_path(_Config) ->
fragment => "näsa"}),
"/d%C3%A4r?name=%C3%B6rn" =
uri_string:recompose(#{path => "/där",
- query => "?name=örn"}),
+ query => "name=örn"}),
"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" =
uri_string:recompose(#{path => "/där",
- query => "?name=örn",
+ query => "name=örn",
fragment => "näsa"}).
--
cgit v1.2.3
From fd276f4a2a109d19d25cffee54a2c21ee4568085 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Mon, 16 Oct 2017 16:12:18 +0200
Subject: stdlib: Improve support of mixed lists (transcode)
- transcode/2 flattens input lists in order to be able to handle
lists with percent-encoded parts that are split into muliple
list and binary segments.
- Add additional tests for transcoding mixed lists.
---
lib/stdlib/src/uri_string.erl | 35 ++++++++++++++++++++++-------------
lib/stdlib/test/uri_string_SUITE.erl | 6 +++++-
2 files changed, 27 insertions(+), 14 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index f9e1e273bc..7d180f73b8 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -368,12 +368,14 @@ transcode(URIString, Options) when is_binary(URIString) ->
transcode(URIString, Options) when is_list(URIString) ->
InEnc = proplists:get_value(in_encoding, Options, utf8),
OutEnc = proplists:get_value(out_encoding, Options, utf8),
- try transcode(URIString, [], InEnc, OutEnc) of
+ Flattened = flatten_list(URIString, InEnc),
+ try transcode(Flattened, [], InEnc, OutEnc) of
Result -> Result
catch
throw:{error, List, RestData} -> {invalid_input, List, RestData}
end.
+
%%-------------------------------------------------------------------------
%% Working with query strings
%% HTML 2.0 - application/x-www-form-urlencoded
@@ -1672,11 +1674,6 @@ transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
transcode(L, Acc, [], InEnc, OutEnc).
%%
-transcode([H|T], Acc, List, InEnc, OutEnc) when is_binary(H) ->
- L = convert_list(H, InEnc),
- transcode(L ++ T, Acc, List, InEnc, OutEnc);
-transcode([H|T], Acc, List, InEnc, OutEnc) when is_list(H) ->
- transcode(H ++ T, Acc, List, InEnc, OutEnc);
transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
@@ -1686,11 +1683,6 @@ transcode([], Acc, List, _InEncoding, _OutEncoding) ->
%% Transcode percent-encoded segment
-transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_binary(H) ->
- L = convert_list(H, InEnc),
- transcode_pct(L ++ T, Acc, B, InEnc, OutEnc);
-transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_list(H) ->
- transcode_pct(H ++ T, Acc, B, InEnc, OutEnc);
transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
@@ -1710,7 +1702,7 @@ transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
lists:reverse(Acc) ++ Out.
-% Convert binary
+%% Convert to binary
convert_binary(Binary, InEncoding, OutEncoding) ->
case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
{error, List, RestData} ->
@@ -1722,7 +1714,7 @@ convert_binary(Binary, InEncoding, OutEncoding) ->
end.
-% Convert binary
+%% Convert to list
convert_list(Binary, InEncoding) ->
case unicode:characters_to_list(Binary, InEncoding) of
{error, List, RestData} ->
@@ -1734,5 +1726,22 @@ convert_list(Binary, InEncoding) ->
end.
+%% Flatten input list
+flatten_list([], _) ->
+ [];
+flatten_list(L, InEnc) ->
+ flatten_list(L, InEnc, []).
+%%
+flatten_list([H|T], InEnc, Acc) when is_binary(H) ->
+ L = convert_list(H, InEnc),
+ flatten_list(T, InEnc, lists:reverse(L) ++ Acc);
+flatten_list([H|T], InEnc, Acc) when is_list(H) ->
+ flatten_list(H ++ T, InEnc, Acc);
+flatten_list([H|T], InEnc, Acc) ->
+ flatten_list(T, InEnc, [H|Acc]);
+flatten_list([], _InEnc, Acc) ->
+ lists:reverse(Acc).
+
+
percent_encode_segment(Segment) ->
percent_encode_binary(Segment, <<>>).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 8a10948f32..901d38a4da 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -794,7 +794,11 @@ transcode_options(_Config) ->
transcode_mixed(_Config) ->
"foo%00%00%00%F6bar" =
- uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]).
+ uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]).
transcode_negative(_Config) ->
{invalid_input,"foo","BX"} =
--
cgit v1.2.3
From 5fe4c673bb8ee10d0fccadb4da14d7a500c2b8ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Wed, 18 Oct 2017 15:48:04 +0200
Subject: stdlib: Implement compose_query and dissect_query
---
lib/stdlib/src/uri_string.erl | 226 +++++++++++++++++++++++++++++++----
lib/stdlib/test/uri_string_SUITE.erl | 38 +++++-
2 files changed, 240 insertions(+), 24 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 7d180f73b8..1b8f8b828f 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,8 +226,9 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1,
- parse/1, recompose/1, resolve_uri_reference/2, transcode/2]).
+-export([compose_query/1, compose_query/2, create_uri_reference/2,
+ dissect_query/1, normalize/1, parse/1,
+ recompose/1, resolve_uri_reference/2, transcode/2]).
-export_type([uri_map/0, uri_string/0]).
@@ -377,28 +378,66 @@ transcode(URIString, Options) when is_list(URIString) ->
%%-------------------------------------------------------------------------
-%% Working with query strings
-%% HTML 2.0 - application/x-www-form-urlencoded
-%% RFC 1866 [8.2.1]
+%% Functions for working with the query part of a URI as a list
+%% of key/value pairs.
+%% HTML 2.0 (RFC 1866) defines a media type application/x-www-form-urlencoded
+%% in section [8.2.1] "The form-urlencoded Media Type".
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
%% Compose urlencoded query string from a list of unescaped key/value pairs.
%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
- QueryList :: [{unicode:chardata(), unicode:chardata()}],
- QueryString :: uri_string().
-compose_query(_) ->
- "".
+ QueryList :: [{uri_string(), uri_string()}],
+ QueryString :: string().
+compose_query(List) ->
+ compose_query(List, []).
+
+
+-spec compose_query(QueryList, Options) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ Options :: [{separator, atom()}],
+ QueryString :: string().
+compose_query([],_Options) ->
+ [];
+compose_query(List, Options) ->
+ try compose_query(List, Options, []) of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+%%
+compose_query([{Key,Value}|Rest], Options, Acc) ->
+ Separator = get_separator(Options, Acc),
+ K = form_urlencode(Key),
+ V = form_urlencode(Value),
+ compose_query(Rest, Options, Acc ++ Separator ++ K ++ "=" ++ V);
+compose_query([], _Options, Acc) ->
+ Acc.
+
%%-------------------------------------------------------------------------
%% Dissect a query string into a list of unescaped key/value pairs.
%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
- QueryList :: [{unicode:chardata(), unicode:chardata()}].
-dissect_query(_) ->
- "".
+ QueryList :: [{string(), string()}].
+dissect_query([]) ->
+ [];
+dissect_query(QueryString) when is_binary(QueryString) ->
+ L = convert_list(QueryString, utf8),
+ try dissect_query_key(L, [], [], []) of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+dissect_query(QueryString) ->
+ L = flatten_list(QueryString, utf8),
+ try dissect_query_key(L, [], [], []) of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
%%%========================================================================
@@ -1705,10 +1744,10 @@ transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
%% Convert to binary
convert_binary(Binary, InEncoding, OutEncoding) ->
case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
- {error, List, RestData} ->
- throw({error, List, RestData});
- {incomplete, List, RestData} ->
- throw({error, List, RestData});
+ {error, _List, RestData} ->
+ throw({error, unicode, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, unicode, RestData});
Result ->
Result
end.
@@ -1717,10 +1756,10 @@ convert_binary(Binary, InEncoding, OutEncoding) ->
%% Convert to list
convert_list(Binary, InEncoding) ->
case unicode:characters_to_list(Binary, InEncoding) of
- {error, List, RestData} ->
- throw({error, List, RestData});
- {incomplete, List, RestData} ->
- throw({error, List, RestData});
+ {error, _List, RestData} ->
+ throw({error, unicode, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, unicode, RestData});
Result ->
Result
end.
@@ -1740,8 +1779,153 @@ flatten_list([H|T], InEnc, Acc) when is_list(H) ->
flatten_list([H|T], InEnc, Acc) ->
flatten_list(T, InEnc, [H|Acc]);
flatten_list([], _InEnc, Acc) ->
- lists:reverse(Acc).
+ lists:reverse(Acc);
+flatten_list(Arg, _, _) ->
+ throw({error, badarg, Arg}).
+
percent_encode_segment(Segment) ->
percent_encode_binary(Segment, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for compose_query
+%%-------------------------------------------------------------------------
+
+%% Returns separator to be used between key-value pairs
+get_separator(_, Acc) when length(Acc) =:= 0 ->
+ [];
+get_separator([], _Acc) ->
+ "&";
+get_separator([{separator, amp}], _Acc) ->
+ "&";
+get_separator([{separator, semicolon}], _Acc) ->
+ ";".
+
+
+%% Form-urlencode input based on RFC 1866 [8.2.1]
+form_urlencode(Cs) when is_binary(Cs) ->
+ L = convert_list(Cs, utf8),
+ form_urlencode(L, []);
+form_urlencode(Cs) ->
+ L = flatten_list(Cs, utf8),
+ form_urlencode(L, []).
+%%
+form_urlencode([], Acc) ->
+ lists:reverse(Acc);
+form_urlencode([$ |T], Acc) ->
+ form_urlencode(T, [$+|Acc]);
+form_urlencode([H|T], Acc) ->
+ case is_url_char(H) of
+ true ->
+ form_urlencode(T, [H|Acc]);
+ false ->
+ E = urlencode_char(H),
+ form_urlencode(T, lists:reverse(E) ++ Acc)
+ end.
+
+
+urlencode_char(C) ->
+ B = percent_encode_binary(C),
+ unicode:characters_to_list(B).
+
+
+%% Return true if input char can appear in URL according to
+%% RFC 1738 "Uniform Resource Locators".
+is_url_char(C)
+ when 0 =< C, C =< 31;
+ 128 =< C, C =< 255 -> false;
+is_url_char(127) -> false;
+is_url_char(C) ->
+ not (is_reserved(C) orelse is_unsafe(C)).
+
+
+%% Reserved characters (RFC 1738)
+is_reserved($;) -> true;
+is_reserved($/) -> true;
+is_reserved($?) -> true;
+is_reserved($:) -> true;
+is_reserved($@) -> true;
+is_reserved($=) -> true;
+is_reserved($&) -> true;
+is_reserved(_) -> false.
+
+
+%% Unsafe characters (RFC 1738)
+is_unsafe(${) -> true;
+is_unsafe($}) -> true;
+is_unsafe($|) -> true;
+is_unsafe($\\) -> true;
+is_unsafe($^) -> true;
+is_unsafe($~) -> true;
+is_unsafe($[) -> true;
+is_unsafe($]) -> true;
+is_unsafe($`) -> true;
+is_unsafe(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for dissect_query
+%%-------------------------------------------------------------------------
+dissect_query_key([$=|T], Acc, Key, Value) ->
+ dissect_query_value(T, Acc, Key, Value);
+dissect_query_key([H|T], Acc, Key, Value) ->
+ dissect_query_key(T, Acc, [H|Key], Value);
+dissect_query_key(L, _, _, _) ->
+ throw({error, missing_value, L}).
+
+
+dissect_query_value([$&|_] = L, Acc, Key, Value) ->
+ K = form_urldecode(lists:reverse(Key)),
+ V = form_urldecode(lists:reverse(Value)),
+ dissect_query_separator_amp(L, [{K,V}|Acc], [], []);
+dissect_query_value([$;|_] = L, Acc, Key, Value) ->
+ K = form_urldecode(lists:reverse(Key)),
+ V = form_urldecode(lists:reverse(Value)),
+ dissect_query_separator_semicolon(L, [{K,V}|Acc], [], []);
+dissect_query_value([H|T], Acc, Key, Value) ->
+ dissect_query_value(T, Acc, Key, [H|Value]);
+dissect_query_value([], Acc, Key, Value) ->
+ K = form_urldecode(lists:reverse(Key)),
+ V = form_urldecode(lists:reverse(Value)),
+ lists:reverse([{K,V}|Acc]).
+
+
+dissect_query_separator_amp("&" ++ T, Acc, Key, Value) ->
+ dissect_query_key(T, Acc, Key, Value);
+dissect_query_separator_amp(L, _, _, _) ->
+ throw({error, invalid_separator, L}).
+
+
+dissect_query_separator_semicolon([$;|T], Acc, Key, Value) ->
+ dissect_query_key(T, Acc, Key, Value).
+
+
+%% Form-urldecode input based on RFC 1866 [8.2.1]
+form_urldecode(Cs) ->
+ B = convert_binary(Cs, utf8, utf8),
+ Result = form_urldecode(B, <<>>),
+ convert_list(Result, utf8).
+%%
+form_urldecode(<<>>, Acc) ->
+ convert_list(Acc, utf8);
+form_urldecode(<<$+,T/binary>>, Acc) ->
+ form_urlencode(T, [$ |Acc]);
+form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ form_urldecode(T, <>);
+ false ->
+ L = convert_list(<<$%,C0,C1,T/binary>>, utf8),
+ throw({error, urldecode, L})
+ end;
+form_urldecode(<>, Acc) ->
+ case is_url_char(H) of
+ true ->
+ form_urldecode(T, <>);
+ false ->
+ L = convert_list(<>, utf8),
+ throw({error, urldecode, L})
+ end.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 901d38a4da..beb534e023 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -37,7 +37,9 @@
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
recompose_autogen/1, parse_recompose_autogen/1,
- transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1,
+ compose_query/1, compose_query_negative/1,
+ dissect_query/1, dissect_query_negative/1
]).
@@ -104,7 +106,11 @@ all() ->
transcode_basic,
transcode_options,
transcode_mixed,
- transcode_negative
+ transcode_negative,
+ compose_query,
+ compose_query_negative,
+ dissect_query,
+ dissect_query_negative
].
groups() ->
@@ -803,5 +809,31 @@ transcode_mixed(_Config) ->
transcode_negative(_Config) ->
{invalid_input,"foo","BX"} =
uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
- {invalid_input,<<>>,<<"ö">>} =
+ {invalid_input,unicode,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
+
+compose_query(_Config) ->
+ [] = uri_string:compose_query([]),
+ "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]),
+ "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]),
+ "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]),
+ "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]).
+
+compose_query_negative(_Config) ->
+ {error,badarg,4} = uri_string:compose_query([{"",4}]).
+
+dissect_query(_Config) ->
+ [] = uri_string:dissect_query(""),
+ [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"),
+ [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"),
+ [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]),
+ [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2").
+
+dissect_query_negative(_Config) ->
+ {error,invalid_separator,"≈bar=2"} =
+ uri_string:dissect_query("foo=1≈bar=2"),
+ {error,urldecode,"&bar"} =
+ uri_string:dissect_query("foo1&bar=2"),
+ {error,urldecode,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"),
+ {error,unicode,<<153,182>>} =
+ uri_string:dissect_query("foo=%99%B6&bar=2").
--
cgit v1.2.3
From 75989c8024283155f6f8075ee9e81b50a65e9ecb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Thu, 19 Oct 2017 17:19:46 +0200
Subject: stdlib: Improve error handling
---
lib/stdlib/src/uri_string.erl | 129 +++++++++++++++++++++--------------
lib/stdlib/test/uri_string_SUITE.erl | 52 ++++++++------
2 files changed, 111 insertions(+), 70 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 1b8f8b828f..51f7564934 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -293,11 +293,22 @@
URIString :: uri_string(),
URIMap :: uri_map().
parse(URIString) when is_binary(URIString) ->
- parse_uri_reference(URIString, #{});
+ try parse_uri_reference(URIString, #{}) of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
parse(URIString) when is_list(URIString) ->
- Binary = unicode:characters_to_binary(URIString),
- Map = parse_uri_reference(Binary, #{}),
- convert_mapfields_to_list(Map).
+ try
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map)
+ of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
%%-------------------------------------------------------------------------
%% Recompose URIs
@@ -308,17 +319,24 @@ parse(URIString) when is_list(URIString) ->
recompose(Map) ->
case is_valid_map(Map) of
false ->
- error({badarg, invalid_map});
+ {error, invalid_map, Map};
true ->
- T0 = update_scheme(Map, empty),
- T1 = update_userinfo(Map, T0),
- T2 = update_host(Map, T1),
- T3 = update_port(Map, T2),
- T4 = update_path(Map, T3),
- T5 = update_query(Map, T4),
- update_fragment(Map, T5)
+ try
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ of
+ Result -> Result
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end
end.
+
%%-------------------------------------------------------------------------
%% Resolve references
%%-------------------------------------------------------------------------
@@ -364,7 +382,7 @@ transcode(URIString, Options) when is_binary(URIString) ->
of
Result -> Result
catch
- throw:{error, L, RestData} -> {invalid_input, L, RestData}
+ throw:{error, _, RestData} -> {error, invalid_input, RestData}
end;
transcode(URIString, Options) when is_list(URIString) ->
InEnc = proplists:get_value(in_encoding, Options, utf8),
@@ -373,7 +391,7 @@ transcode(URIString, Options) when is_list(URIString) ->
try transcode(Flattened, [], InEnc, OutEnc) of
Result -> Result
catch
- throw:{error, List, RestData} -> {invalid_input, List, RestData}
+ throw:{error, _, RestData} -> {error, invalid_input, RestData}
end.
@@ -467,7 +485,7 @@ parse_uri_reference(URIString, URI) ->
try parse_scheme_start(URIString, URI) of
Res -> Res
catch
- throw:uri_parse_error ->
+ throw:{_,_,_} ->
parse_relative_part(URIString, URI)
end.
@@ -495,7 +513,7 @@ parse_relative_part(?STRING_REST("//", Rest), URI) ->
URI2 = maybe_add_path(URI1),
URI2#{userinfo => decode_userinfo(Userinfo)}
catch
- throw:uri_parse_error ->
+ throw:{_,_,_} ->
{T, URI1} = parse_host(Rest, URI),
Host = calculate_parsed_part_sl(Rest, T),
URI2 = maybe_add_path(URI1),
@@ -521,7 +539,7 @@ parse_relative_part(?STRING_REST(Char, Rest), URI) ->
{T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
Path = calculate_parsed_part(Rest, T),
URI1#{path => decode_path(?STRING_REST(Char, Path))};
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end.
@@ -571,7 +589,7 @@ parse_segment(?STRING_REST($#, Rest), URI) ->
parse_segment(?STRING_REST(Char, Rest), URI) ->
case is_pchar(Char) of
true -> parse_segment(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_segment(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -594,7 +612,7 @@ parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
case is_segment_nz_nc(Char) of
true -> parse_segment_nz_nc(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_segment_nz_nc(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -629,7 +647,7 @@ parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
Scheme = calculate_parsed_scheme(Rest, T),
URI2 = maybe_add_path(URI1),
URI2#{scheme => ?STRING_REST(Char, Scheme)};
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end.
%% Add path component if it missing after parsing the URI.
@@ -653,10 +671,10 @@ parse_scheme(?STRING_REST($:, Rest), URI) ->
parse_scheme(?STRING_REST(Char, Rest), URI) ->
case is_scheme(Char) of
true -> parse_scheme(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_scheme(?STRING_EMPTY, _URI) ->
- throw(uri_parse_error).
+ throw({error,invalid_uri,<<>>}).
%% Check if char is allowed in scheme
@@ -681,7 +699,7 @@ parse_hier(?STRING_REST("//", Rest), URI) ->
Userinfo = calculate_parsed_userinfo(Rest, T),
{Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
catch
- throw:uri_parse_error ->
+ throw:{_,_,_} ->
{T, URI1} = parse_host(Rest, URI),
Host = calculate_parsed_part_sl(Rest, T),
{Rest, URI1#{host => decode_host(remove_brackets(Host))}}
@@ -704,7 +722,7 @@ parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
{T, URI1} = parse_segment(Rest, URI),
Path = calculate_parsed_part(Rest, T),
{Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_hier(?STRING_EMPTY, URI) ->
{<<>>, URI}.
@@ -744,11 +762,11 @@ parse_userinfo(?STRING_REST($@, Rest), URI) ->
parse_userinfo(?STRING_REST(Char, Rest), URI) ->
case is_userinfo(Char) of
true -> parse_userinfo(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_userinfo(?STRING_EMPTY, _URI) ->
%% URI cannot end in userinfo state
- throw(uri_parse_error).
+ throw({error,invalid_uri,<<>>}).
%% Check if char is allowed in userinfo
@@ -847,7 +865,7 @@ parse_reg_name(?STRING_REST($#, Rest), URI) ->
parse_reg_name(?STRING_REST(Char, Rest), URI) ->
case is_reg_name(Char) of
true -> parse_reg_name(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_reg_name(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -883,7 +901,7 @@ parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv4(Char) of
true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
_ = validate_ipv4_address(lists:reverse(Acc)),
@@ -899,7 +917,7 @@ is_ipv4(Char) -> is_digit(Char).
validate_ipv4_address(Addr) ->
case inet:parse_ipv4strict_address(Addr) of
{ok, _} -> Addr;
- {error, _} -> throw(uri_parse_error)
+ {error, _} -> throw({error,invalid_uri,Addr})
end.
@@ -910,10 +928,10 @@ parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) ->
parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
case is_ipv6(Char) of
true -> parse_ipv6_bin(Rest, [Char|Acc], URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
- throw(uri_parse_error).
+ throw({error,invalid_uri,<<>>}).
%% Check if char is allowed in IPv6 addresses
-spec is_ipv6(char()) -> boolean().
@@ -943,7 +961,7 @@ parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
case is_ipv6(Char) of
true -> parse_ipv6_bin_end(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -952,7 +970,7 @@ parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
validate_ipv6_address(Addr) ->
case inet:parse_ipv6strict_address(Addr) of
{ok, _} -> Addr;
- {error, _} -> throw(uri_parse_error)
+ {error, _} -> throw({error,invalid_uri,Addr})
end.
@@ -981,7 +999,7 @@ parse_port(?STRING_REST($#, Rest), URI) ->
parse_port(?STRING_REST(Char, Rest), URI) ->
case is_digit(Char) of
true -> parse_port(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_port(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -1007,7 +1025,7 @@ parse_query(?STRING_REST($#, Rest), URI) ->
parse_query(?STRING_REST(Char, Rest), URI) ->
case is_query(Char) of
true -> parse_query(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_query(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -1033,7 +1051,7 @@ is_query(Char) -> is_pchar(Char).
parse_fragment(?STRING_REST(Char, Rest), URI) ->
case is_fragment(Char) of
true -> parse_fragment(Rest, URI);
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_uri,[Char]})
end;
parse_fragment(?STRING_EMPTY, URI) ->
{?STRING_EMPTY, URI}.
@@ -1335,9 +1353,9 @@ decode_fragment(Cs) ->
check_utf8(Cs) ->
case unicode:characters_to_list(Cs) of
{incomplete,_,_} ->
- throw(uri_parse_error);
+ throw({error,non_utf8,Cs});
{error,_,_} ->
- throw(uri_parse_error);
+ throw({error,non_utf8,Cs});
_ -> Cs
end.
@@ -1348,13 +1366,13 @@ check_utf8(Cs) ->
%% Only validates as scheme cannot have percent-encoded characters
-spec encode_scheme(list()|binary()) -> list() | binary().
encode_scheme([]) ->
- throw(uri_parse_error);
+ throw({error,invalid_scheme,""});
encode_scheme(<<>>) ->
- throw(uri_parse_error);
+ throw({error,invalid_scheme,<<>>});
encode_scheme(Scheme) ->
case validate_scheme(Scheme) of
true -> Scheme;
- false -> throw(uri_parse_error)
+ false -> throw({error,invalid_scheme,Scheme})
end.
-spec encode_userinfo(list()|binary()) -> list() | binary().
@@ -1390,12 +1408,12 @@ decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
true ->
B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
decode(Cs, Fun, <>);
- false -> throw(uri_parse_error)
+ false -> throw({error,percent_decode,<<$%,C0,C1>>})
end;
decode(<>, Fun, Acc) ->
case Fun(C) of
true -> decode(Cs, Fun, <>);
- false -> throw(uri_parse_error)
+ false -> throw({error,percent_decode,<>})
end;
decode(<<>>, _Fun, Acc) ->
Acc.
@@ -1424,8 +1442,8 @@ encode(Component, Fun) when is_binary(Component) ->
encode(<>, Fun, Acc) ->
C = encode_codepoint_binary(Char, Fun),
encode(Rest, Fun, <>);
-encode(<<_Char, _Rest/binary>>, _Fun, _Acc) ->
- throw(uri_parse_error);
+encode(<>, _Fun, _Acc) ->
+ throw({error,percent_encode,<>});
encode(<<>>, _Fun, Acc) ->
Acc.
@@ -1554,7 +1572,8 @@ is_valid_map(Map) ->
not maps:is_key(host, Map) andalso
maps:is_key(port, Map))) orelse
not maps:is_key(path, Map) orelse
- not is_host_and_path_valid(Map)
+ not is_host_and_path_valid(Map) orelse
+ invalid_field_present(Map)
of
true ->
false;
@@ -1563,6 +1582,16 @@ is_valid_map(Map) ->
end.
+invalid_field_present(Map) ->
+ Fun = fun(K, _, AccIn) -> AccIn orelse
+ ((K =/= scheme) andalso (K =/= userinfo)
+ andalso (K =/= host) andalso (K =/= port)
+ andalso (K =/= path) andalso (K =/= query)
+ andalso (K =/= fragment))
+ end,
+ maps:fold(Fun, false, Map).
+
+
is_host_and_path_valid(Map) ->
Host = maps:get(host, Map, undefined),
Path = maps:get(path, Map, undefined),
@@ -1745,9 +1774,9 @@ transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
convert_binary(Binary, InEncoding, OutEncoding) ->
case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
{error, _List, RestData} ->
- throw({error, unicode, RestData});
+ throw({error, invalid_input, RestData});
{incomplete, _List, RestData} ->
- throw({error, unicode, RestData});
+ throw({error, invalid_input, RestData});
Result ->
Result
end.
@@ -1757,9 +1786,9 @@ convert_binary(Binary, InEncoding, OutEncoding) ->
convert_list(Binary, InEncoding) ->
case unicode:characters_to_list(Binary, InEncoding) of
{error, _List, RestData} ->
- throw({error, unicode, RestData});
+ throw({error, invalid_input, RestData});
{incomplete, _List, RestData} ->
- throw({error, unicode, RestData});
+ throw({error, invalid_input, RestData});
Result ->
Result
end.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index beb534e023..b70cb842de 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -32,7 +32,7 @@
parse_pct_encoded_userinfo/1, parse_port/1,
parse_query/1, parse_scheme/1, parse_userinfo/1,
parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
- parse_special/1, parse_special2/1,
+ parse_special/1, parse_special2/1, parse_negative/1,
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
@@ -95,6 +95,7 @@ all() ->
parse_relative,
parse_special,
parse_special2,
+ parse_negative,
recompose_fragment,
recompose_parse_fragment,
recompose_query,
@@ -343,8 +344,8 @@ parse_binary_pct_encoded_userinfo(_Config) ->
uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>),
#{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>),
- uri_parse_error =(catch uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>)),
- uri_parse_error = (catch uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>)).
+ {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>),
+ {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>).
parse_binary_host(_Config) ->
#{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
@@ -359,8 +360,8 @@ parse_binary_host_ipv4(_Config) ->
#{host := <<"127.0.0.1">>, query := <<"name=ferret">>} =
uri_string:parse(<<"//127.0.0.1?name=ferret">>),
#{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>),
- uri_parse_error = (catch uri_string:parse(<<"//127.0.0.x">>)),
- uri_parse_error = (catch uri_string:parse(<<"//1227.0.0.1">>)).
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>).
parse_binary_host_ipv6(_Config) ->
#{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>),
@@ -372,9 +373,9 @@ parse_binary_host_ipv6(_Config) ->
uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>),
#{host := <<"::127.0.0.1">>, fragment := <<"nose">>} =
uri_string:parse(<<"//[::127.0.0.1]#nose">>),
- uri_parse_error = (catch uri_string:parse(<<"//[::127.0.0.x]">>)),
- uri_parse_error = (catch uri_string:parse(<<"//[::1227.0.0.1]">>)),
- uri_parse_error = (catch uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>)).
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>),
+ {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>).
parse_binary_port(_Config) ->
#{path:= <<"/:8042">>} =
@@ -389,8 +390,8 @@ parse_binary_port(_Config) ->
uri_string:parse(<<"foo://:8042">>),
#{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} =
uri_string:parse(<<"foo://example.com:8042">>),
- uri_parse_error = (catch uri_string:parse(":600")),
- uri_parse_error = (catch uri_string:parse("//:8042x")).
+ {error,invalid_uri,":"} = uri_string:parse(":600"),
+ {error,invalid_uri,"x"} = uri_string:parse("//:8042x").
parse_binary_path(_Config) ->
#{path := <<"over/there">>} = uri_string:parse(<<"over/there">>),
@@ -511,8 +512,8 @@ parse_pct_encoded_userinfo(_Config) ->
uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"),
#{scheme := "foo", userinfo := "合:気", host := "道"} =
uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"),
- uri_parse_error =(catch uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@")),
- uri_parse_error = (catch uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@")).
+ {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@").
parse_host(_Config) ->
@@ -528,8 +529,8 @@ parse_host_ipv4(_Config) ->
#{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"),
#{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
#{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"),
- uri_parse_error = (catch uri_string:parse("//127.0.0.x")),
- uri_parse_error = (catch uri_string:parse("//1227.0.0.1")).
+ {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1").
parse_host_ipv6(_Config) ->
#{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"),
@@ -537,9 +538,9 @@ parse_host_ipv6(_Config) ->
#{host := "::127.0.0.1", query := "name=ferret"} =
uri_string:parse("//[::127.0.0.1]?name=ferret"),
#{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"),
- uri_parse_error = (catch uri_string:parse("//[::127.0.0.x]")),
- uri_parse_error = (catch uri_string:parse("//[::1227.0.0.1]")),
- uri_parse_error = (catch uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]")).
+ {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"),
+ {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]").
parse_port(_Config) ->
#{path:= "/:8042"} =
@@ -693,6 +694,17 @@ parse_special2(_Config) ->
#{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"),
#{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/").
+parse_negative(_Config) ->
+ {error,invalid_uri,"å"} = uri_string:parse("å"),
+ {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"),
+ {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"),
+ {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"),
+ {error,non_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6").
+
+
%%-------------------------------------------------------------------------
%% Recompose tests
%%-------------------------------------------------------------------------
@@ -807,9 +819,9 @@ transcode_mixed(_Config) ->
uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]).
transcode_negative(_Config) ->
- {invalid_input,"foo","BX"} =
+ {error,invalid_input,"BX"} =
uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
- {invalid_input,unicode,<<"ö">>} =
+ {error,invalid_input,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
compose_query(_Config) ->
@@ -835,5 +847,5 @@ dissect_query_negative(_Config) ->
{error,urldecode,"&bar"} =
uri_string:dissect_query("foo1&bar=2"),
{error,urldecode,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"),
- {error,unicode,<<153,182>>} =
+ {error,invalid_input,<<153,182>>} =
uri_string:dissect_query("foo=%99%B6&bar=2").
--
cgit v1.2.3
From b439d19d38479d6264d906dd926a168c9c514da3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=
Date: Fri, 20 Oct 2017 16:32:42 +0200
Subject: stdlib: Update documentation (uri_string)
---
lib/stdlib/doc/src/uri_string.xml | 114 +++++++++++++-------------------------
lib/stdlib/src/uri_string.erl | 58 ++++++-------------
2 files changed, 56 insertions(+), 116 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 8283b8ca0e..496573ae2f 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -24,7 +24,7 @@
maps
Péter Dimitrov
1
- 2017-08-23
+ 2017-10-20
A
uri_string
@@ -34,7 +34,8 @@
A URI is an identifier consisting of a sequence of characters matching the syntax
rule named URI in RFC 3986.
The generic URI syntax consists of a hierarchical sequence of components referred
- to as the scheme, authority, path, query, and fragment:
+ to as the scheme, authority, path, query, and fragment:
+
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
hier-part = "//" authority path-abempty
/ path-absolute
@@ -51,35 +52,26 @@
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-
The interpretation of a URI depends only on the characters used and not on how those
characters are represented in a network protocol.
- The functions implemented by this module covers the following use cases:
+
The functions implemented by this module covers the following use cases:
- Parsing URIs
parse/1
- Recomposing URIs
recompose/2
- - Resolving URI references
- resolve_uri_reference/3
- - Creating URI references
- create_uri_reference/3
- - Normalizing URIs
- normalize/1
- Transcoding URIs
transcode/2
- - Working with urlencoded query strings
- compose_query/1, dissect_query/1
+ - Working with form-urlencoded query strings
+ compose_query/[1,2], dissect_query/1
-
- There are four different encodings present during the handling of URIs:
+
There are four different encodings present during the handling of URIs:
- Inbound binary encoding in binaries
- Inbound percent-encoding in lists and binaries
- Outbound binary encoding in binaries
- Outbound percent-encoding in lists and binaries
-
Unless otherwise specified the return value type and encoding are the same as the input
type and encoding. That is, binary input returns binary output, list input returns a list
output but mixed input returns list output. Input and output encodings are the same except
@@ -113,31 +105,34 @@
Compose urlencoded query string.
- Composes an urlencoded QueryString based on a
+
Composes a form-urlencoded QueryString based on a
QueryList , a list of unescaped key-value pairs.
Media type application/x-www-form-urlencoded is defined in section
- 8.2.1 of RFC 1866 (HTML 2.0).
+ 8.2.1 of RFC 1866 (HTML 2.0). Reserved and unsafe characters, as
+ defined by RFC 1738 (Uniform Resource Locators), are procent-encoded.
- If an argument is invalid, a badarg exception is raised.
Example:
-1> uri_string:compose_query(...).
-
+1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).
+
+
Creates an RFC 3986 compliant
If an argument is invalid, a
Same as
Example:
-1> uri_string:create_uri_reference(...,...). -+1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], +2> [{separator, semicolon}]). +"foo+bar=1;city=%C3%B6rebro" +
Dissects an urlencoded
If an argument is invalid, a
Example:
-1> uri_string:dissect_query(...). -- - - -
Normalizes an RFC 3986 compliant
If an argument is invalid, a
Example:
--1> uri_string:normalize("http://example.org/one/two/../../one"). -"http://example.org/one" -+1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro"). +[{"foo bar","1"},{"city","örebro"}] +
Returns a
If parsing fails, a
If parsing fails, an error tuple is returned.
Example:
1> uri_string:parse("foo://user@example.com:8042/over/there?name=ferret#nose"). #{fragment => "nose",host => "example.com", path => "/over/there",port => 8042,query => "name=ferret", scheme => foo,userinfo => "user"} -2>+
Returns an RFC 3986 compliant
If the
If the
Example:
1> URIMap = #{fragment => "nose", host => "example.com", path => "/over/there", -port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}. +port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. #{fragment => "top",host => "example.com", path => "/over/there",port => 8042,query => "?name=ferret", scheme => foo,userinfo => "user"} -2> uri_string:recompose(URIMap, []). +2> uri_string:recompose(URIMap). "foo://example.com:8042/over/there?name=ferret#nose"
Resolves an RFC 3986 compliant
If an argument is invalid, a
Example:
--1> uri_string:resolve_uri_reference(...,...). --
Transcodes an RFC 3986 compliant
If an argument is invalid, a
If an argument is invalid, an error tuple is returned.
Example:
-1> uri_string:transcode(<<"foo://f%20oo">>, [{in_encoding, utf8}, -{out_encoding, utf16}]). -<<0,102,0,111,0,111,0,58,0,47,0,47,0,102,0,37,0,48,0,48,0,37,0,50,0,48,0, - 111,0,111>> -+1> >,]]> +2> [{in_encoding, utf32},{out_encoding, utf8}]). +>]]> +
This module contains functions for parsing and handling RFC 3986 compliant URIs.
+This module contains functions for parsing and handling URIs (RFC 3986) and + form-urlencoded query strings (RFC 1866).
A URI is an identifier consisting of a sequence of characters matching the syntax rule named URI in RFC 3986.
The generic URI syntax consists of a hierarchical sequence of components referred
@@ -109,7 +110,7 @@
Example:
@@ -125,8 +126,7 @@@@ -143,13 +181,19 @@ Same as
+ between key-value pairs. There are three supported separator types:compose_query/1 but with an additionalparameter, that controls the type of separator used - between key-value pairs. There are two supported separator types: Options amp () - andsemicolon (;).amp (),escaped_amp () andsemicolon (;). If the parameteris empty, separator takes the default value ( Options escaped_amp ).Example:
1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 8723d3f183..a4fd9c66f4 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -1806,6 +1806,8 @@ get_separator(_, Acc) when length(Acc) =:= 0 -> get_separator([], _Acc) -> "&"; get_separator([{separator, amp}], _Acc) -> + "&"; +get_separator([{separator, escaped_amp}], _Acc) -> "&"; get_separator([{separator, semicolon}], _Acc) -> ";". @@ -1901,6 +1903,8 @@ dissect_query_value([], Acc, Key, Value) -> dissect_query_separator_amp("&" ++ T, Acc, Key, Value) -> dissect_query_key(T, Acc, Key, Value); +dissect_query_separator_amp("&" ++ T, Acc, Key, Value) -> + dissect_query_key(T, Acc, Key, Value); dissect_query_separator_amp(L, _, _, _) -> throw({error, invalid_separator, L}). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index b70cb842de..fe832ac82c 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -827,7 +827,8 @@ transcode_negative(_Config) -> compose_query(_Config) -> [] = uri_string:compose_query([]), "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]), - "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), + "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,escaped_amp}]), + "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]), "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]). @@ -837,12 +838,13 @@ compose_query_negative(_Config) -> dissect_query(_Config) -> [] = uri_string:dissect_query(""), [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), + [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"), [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]), [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"). dissect_query_negative(_Config) -> - {error,invalid_separator,"≈bar=2"} = + {error,urldecode,";bar"} = uri_string:dissect_query("foo=1≈bar=2"), {error,urldecode,"&bar"} = uri_string:dissect_query("foo1&bar=2"), -- cgit v1.2.3 From 642bb27f8104991445a1f507f6b065d3cd7cd1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=Date: Tue, 24 Oct 2017 09:17:55 +0200 Subject: stdlib: Fix title in uri_string.xml --- lib/stdlib/doc/src/uri_string.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 97b38ea93e..d67c687fd1 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -21,10 +21,10 @@ limitations under the License. - maps +uri_string Péter Dimitrov 1 -2017-10-20 +2017-10-24 A uri_string -- cgit v1.2.3 From 3c80849dc9167018a66542b76b441e675d404a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?=Date: Tue, 24 Oct 2017 13:19:37 +0200 Subject: stdlib: Refactor parsed binary calculation --- lib/stdlib/src/uri_string.erl | 220 +++++++++++++----------------------------- 1 file changed, 65 insertions(+), 155 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index a4fd9c66f4..684087b870 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -493,7 +493,7 @@ parse_relative_part(?STRING_REST("//", Rest), URI) -> catch throw:{_,_,_} -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_part_sl(Rest, T), + Host = calculate_parsed_host_port(Rest, T), URI2 = maybe_add_path(URI1), URI2#{host => decode_host(remove_brackets(Host))} end; @@ -503,12 +503,12 @@ parse_relative_part(?STRING_REST($/, Rest), URI) -> URI1#{path => decode_path(?STRING_REST($/, Path))}; parse_relative_part(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), URI2 = maybe_add_path(URI1), URI2#{query => decode_query(Query)}; parse_relative_part(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), URI2 = maybe_add_path(URI1), URI2#{fragment => decode_fragment(Fragment)}; parse_relative_part(?STRING_REST(Char, Rest), URI) -> @@ -558,11 +558,11 @@ parse_segment(?STRING_REST($/, Rest), URI) -> parse_segment(Rest, URI); % segment parse_segment(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_segment(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment(?STRING_REST(Char, Rest), URI) -> case is_pchar(Char) of @@ -581,11 +581,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> parse_segment(Rest, URI); % segment parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of @@ -679,7 +679,7 @@ parse_hier(?STRING_REST("//", Rest), URI) -> catch throw:{_,_,_} -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_part_sl(Rest, T), + Host = calculate_parsed_host_port(Rest, T), {Rest, URI1#{host => decode_host(remove_brackets(Host))}} end; parse_hier(?STRING_REST($/, Rest), URI) -> @@ -688,11 +688,11 @@ parse_hier(?STRING_REST($/, Rest), URI) -> {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_hier(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_hier(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless case is_pchar(Char) of @@ -735,7 +735,7 @@ parse_userinfo(?CHAR($@), URI) -> {?STRING_EMPTY, URI#{host => <<>>}}; parse_userinfo(?STRING_REST($@, Rest), URI) -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_host(Rest, T), + Host = calculate_parsed_host_port(Rest, T), {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; parse_userinfo(?STRING_REST(Char, Rest), URI) -> case is_userinfo(Char) of @@ -796,7 +796,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). -spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. parse_host(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_port(Rest, T), + H = calculate_parsed_host_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_host(?STRING_REST($/, Rest), URI) -> @@ -805,13 +805,13 @@ parse_host(?STRING_REST($/, Rest), URI) -> {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_host(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_host(?STRING_REST($[, Rest), URI) -> parse_ipv6_bin(Rest, [], URI); parse_host(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_host(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of @@ -825,7 +825,7 @@ parse_host(?STRING_EMPTY, URI) -> -spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. parse_reg_name(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_port(Rest, T), + H = calculate_parsed_host_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_reg_name(?STRING_REST($/, Rest), URI) -> @@ -834,11 +834,11 @@ parse_reg_name(?STRING_REST($/, Rest), URI) -> {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_reg_name(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_reg_name(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_reg_name(?STRING_REST(Char, Rest), URI) -> case is_reg_name(Char) of @@ -858,7 +858,7 @@ is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_port(Rest, T), + H = calculate_parsed_host_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> @@ -869,12 +869,12 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> case is_ipv4(Char) of @@ -921,7 +921,7 @@ is_ipv6(Char) -> is_hex_digit(Char). -spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_port(Rest, T), + H = calculate_parsed_host_port(Rest, T), Port = binary_to_integer(H), {Rest, URI1#{port => Port}}; parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> @@ -930,11 +930,11 @@ parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> case is_ipv6(Char) of @@ -968,11 +968,11 @@ parse_port(?STRING_REST($/, Rest), URI) -> {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; parse_port(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query(Rest, T), + Query = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{query => decode_query(Query)}}; parse_port(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_port(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of @@ -998,7 +998,7 @@ parse_port(?STRING_EMPTY, URI) -> -spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. parse_query(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_fragment(Rest, T), + Fragment = calculate_parsed_query_fragment(Rest, T), {Rest, URI1#{fragment => decode_fragment(Fragment)}}; parse_query(?STRING_REST(Char, Rest), URI) -> case is_query(Char) of @@ -1115,144 +1115,77 @@ remove_brackets(Addr) -> Addr. %%------------------------------------------------------------------------- %% Helper functions for calculating the parsed binary. %%------------------------------------------------------------------------- +-spec calculate_parsed_scheme(binary(), binary()) -> binary(). +calculate_parsed_scheme(Input, <<>>) -> + strip_last_char(Input, [$:]); +calculate_parsed_scheme(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + -%% Returns the parsed binary based on Input and the Unparsed part. -%% Handles the following special cases: -%% -%% #{host => [],path => "/",query => []} = uri_string:parse("///?") -%% #{fragment => [],host => [],path => "/"} = uri_string:parse("///#") -%% -spec calculate_parsed_part(binary(), binary()) -> binary(). -calculate_parsed_part(<<$?>>, _) -> <<>>; -calculate_parsed_part(<<$#>>, _) -> <<>>; -calculate_parsed_part(<<>>, _) -> <<>>; calculate_parsed_part(Input, <<>>) -> - case binary:last(Input) of - $? -> - init_binary(Input); - $# -> - init_binary(Input); - _Else -> - Input - end; + strip_last_char(Input, [$?,$#]); calculate_parsed_part(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. + get_parsed_binary(Input, Unparsed). -spec calculate_parsed_userinfo(binary(), binary()) -> binary(). -calculate_parsed_userinfo(<<$?>>, _) -> <<>>; -calculate_parsed_userinfo(<<$#>>, _) -> <<>>; -calculate_parsed_userinfo(<<>>, _) -> <<>>; calculate_parsed_userinfo(Input, <<>>) -> - case binary:last(Input) of - $? -> - init_binary(Input); - $# -> - init_binary(Input); - $@ -> - init_binary(Input); - _Else -> - Input - end; + strip_last_char(Input, [$?,$#,$@]); calculate_parsed_userinfo(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_host_port(binary(), binary()) -> binary(). +calculate_parsed_host_port(Input, <<>>) -> + strip_last_char(Input, [$?,$#,$/]); +calculate_parsed_host_port(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + +calculate_parsed_query_fragment(Input, <<>>) -> + strip_last_char(Input, [$#]); +calculate_parsed_query_fragment(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). --spec calculate_parsed_host(binary(), binary()) -> binary(). -calculate_parsed_host(<<$?>>, _) -> <<>>; -calculate_parsed_host(<<$#>>, _) -> <<>>; -calculate_parsed_host(<<>>, _) -> <<>>; -calculate_parsed_host(Input, <<>>) -> + +%% Strip last char if it is in list +strip_last_char(<<>>, _) -> <<>>; +strip_last_char(Input, [C0]) -> case binary:last(Input) of - $? -> - init_binary(Input); - $# -> - init_binary(Input); - $/ -> + C0 -> init_binary(Input); _Else -> Input end; -calculate_parsed_host(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. - - --spec calculate_parsed_port(binary(), binary()) -> binary(). -calculate_parsed_port(<<$?>>, _) -> <<>>; -calculate_parsed_port(<<$#>>, _) -> <<>>; -calculate_parsed_port(<<>>, _) -> <<>>; -calculate_parsed_port(Input, <<>>) -> +strip_last_char(Input, [C0,C1]) -> case binary:last(Input) of - $? -> - init_binary(Input); - $# -> + C0 -> init_binary(Input); - $/ -> + C1 -> init_binary(Input); _Else -> Input end; -calculate_parsed_port(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. - - -calculate_parsed_query(<<$#>>, _) -> <<>>; -calculate_parsed_query(<<>>, _) -> <<>>; -calculate_parsed_query(Input, <<>>) -> +strip_last_char(Input, [C0,C1,C2]) -> case binary:last(Input) of - $# -> + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> init_binary(Input); _Else -> Input - end; -calculate_parsed_query(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. + end. --spec calculate_parsed_fragment(binary(), binary()) -> binary(). -calculate_parsed_fragment(<<$#>>, _) -> <<>>; -calculate_parsed_fragment(Input, Unparsed) -> +%% Get parsed binary +get_parsed_binary(Input, Unparsed) -> {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), First. -%% Returns the parsed binary based on Input and the Unparsed part. -%% Used when parsing authority. -%% -%% Handles the following special cases: -%% -%% #{host => "foo",query => []} = uri_string:parse("//foo?") -%% #{fragment => [],host => "foo"} = uri_string:parse("//foo#") -%% #{host => "foo",path => "/"} = uri_string:parse("//foo/") -%% #{host => "foo",query => [],scheme => "http"} = uri_string:parse("http://foo?") -%% #{fragment => [],host => "foo",scheme => "http"} = uri_string:parse("http://foo#") -%% #{host => "foo",path => "/",scheme => "http"} = uri_string:parse("http://foo/") -%% --spec calculate_parsed_part_sl(binary(), binary()) -> binary(). -calculate_parsed_part_sl(<<$?>>, _) -> <<>>; -calculate_parsed_part_sl(<<$#>>, _) -> <<>>; -calculate_parsed_part_sl(<<>>, _) -> <<>>; -calculate_parsed_part_sl(Input, <<>>) -> - case binary:last(Input) of - $? -> - init_binary(Input); - $# -> - init_binary(Input); - $/ -> - init_binary(Input); - _Else -> - Input - end; -calculate_parsed_part_sl(Input, Unparsed) -> - {First, _} = - split_binary(Input, byte_size_exl_single_slash(Input) - byte_size_exl_head(Unparsed)), - First. - %% Return all bytes of the binary except the last one. The binary must be non-empty. init_binary(B) -> {Init, _} = @@ -1260,14 +1193,6 @@ init_binary(B) -> Init. -%% Returns the parsed binary based on Input and the Unparsed part. -%% Used when parsing scheme. --spec calculate_parsed_scheme(binary(), binary()) -> binary(). -calculate_parsed_scheme(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size(Unparsed) - 1), - First. - - %% Returns the size of a binary exluding the first element. %% Used in calls to split_binary(). -spec byte_size_exl_head(binary()) -> number(). @@ -1275,21 +1200,6 @@ byte_size_exl_head(<<>>) -> 0; byte_size_exl_head(Binary) -> byte_size(Binary) + 1. -%% Returns size of 'Rest' for proper calculation of splitting position. -%% Solves the following special case: -%% -%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). -%% -%% While keeping the following true: -%% -%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>). -%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>). -%% --spec byte_size_exl_single_slash(uri_string()) -> number(). -byte_size_exl_single_slash(<<$/>>) -> 0; -byte_size_exl_single_slash(Rest) -> byte_size(Rest). - - %%------------------------------------------------------------------------- %% [RFC 3986, Chapter 2.1. Percent-Encoding] %% -- cgit v1.2.3 From 992cda82f16ee23b0114563858d5a082711f659b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Wed, 25 Oct 2017 10:11:14 +0200 Subject: stdlib: Refactor compose_query --- lib/stdlib/src/uri_string.erl | 73 +++++++++++++++++++----------------- lib/stdlib/test/uri_string_SUITE.erl | 9 ++++- 2 files changed, 45 insertions(+), 37 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 684087b870..2bf7ceaff1 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -396,19 +396,24 @@ compose_query(List) -> compose_query([],_Options) -> []; compose_query(List, Options) -> - try compose_query(List, Options, []) of + try compose_query(List, Options, false, <<>>) of Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end. %% -compose_query([{Key,Value}|Rest], Options, Acc) -> - Separator = get_separator(Options, Acc), +compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> + Separator = get_separator(Options, Rest), K = form_urlencode(Key), V = form_urlencode(Value), - compose_query(Rest, Options, Acc ++ Separator ++ K ++ "=" ++ V); -compose_query([], _Options, Acc) -> - Acc. + Flag = is_list(Key) orelse is_list(Value), + IsListNew = IsList orelse Flag, + compose_query(Rest, Options, IsListNew, < >); +compose_query([], _Options, IsList, Acc) -> + case IsList of + true -> convert_list(Acc, utf8); + false -> Acc + end. %%------------------------------------------------------------------------- @@ -1711,43 +1716,41 @@ percent_encode_segment(Segment) -> %%------------------------------------------------------------------------- %% Returns separator to be used between key-value pairs -get_separator(_, Acc) when length(Acc) =:= 0 -> - []; -get_separator([], _Acc) -> - "&"; -get_separator([{separator, amp}], _Acc) -> - "&"; -get_separator([{separator, escaped_amp}], _Acc) -> - "&"; -get_separator([{separator, semicolon}], _Acc) -> - ";". +get_separator(_, L) when length(L) =:= 0 -> + <<>>; +get_separator([], _L) -> + <<"&">>; +get_separator([{separator, amp}], _L) -> + <<"&">>; +get_separator([{separator, escaped_amp}], _L) -> + <<"&">>; +get_separator([{separator, semicolon}], _L) -> + <<";">>. %% Form-urlencode input based on RFC 1866 [8.2.1] -form_urlencode(Cs) when is_binary(Cs) -> - L = convert_list(Cs, utf8), - form_urlencode(L, []); +form_urlencode(Cs) when is_list(Cs) -> + B = convert_binary(Cs, utf8, utf8), + form_urlencode(B, <<>>); form_urlencode(Cs) -> - L = flatten_list(Cs, utf8), - form_urlencode(L, []). + form_urlencode(Cs, <<>>). %% -form_urlencode([], Acc) -> - lists:reverse(Acc); -form_urlencode([$ |T], Acc) -> - form_urlencode(T, [$+|Acc]); -form_urlencode([H|T], Acc) -> +form_urlencode(<<>>, Acc) -> + Acc; +form_urlencode(<<$ ,T/binary>>, Acc) -> + form_urlencode(T, < >); +form_urlencode(< >, Acc) -> case is_url_char(H) of true -> - form_urlencode(T, [H|Acc]); + form_urlencode(T, < >); false -> - E = urlencode_char(H), - form_urlencode(T, lists:reverse(E) ++ Acc) - end. - - -urlencode_char(C) -> - B = percent_encode_binary(C), - unicode:characters_to_list(B). + E = percent_encode_binary(H), + form_urlencode(T, < >) + end; +form_urlencode(< >, _Acc) -> + throw({error,invalid_utf8,< >}); +form_urlencode(H, _Acc) -> + throw({error,badarg, H}). %% Return true if input char can appear in URL according to diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index fe832ac82c..c230f7c631 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -830,10 +830,15 @@ compose_query(_Config) -> "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,escaped_amp}]), "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]), - "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]). + "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]), + "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]), + <<"foo+bar=1&%C3%B6=2">> = + uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]). compose_query_negative(_Config) -> - {error,badarg,4} = uri_string:compose_query([{"",4}]). + {error,badarg,4} = uri_string:compose_query([{"",4}]), + {error,badarg,5} = uri_string:compose_query([{5,""}]), + {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]). dissect_query(_Config) -> [] = uri_string:dissect_query(""), -- cgit v1.2.3 From eba3d3e5e9b08839dafcb2e8adc6620d9211d96c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Wed, 25 Oct 2017 14:43:45 +0200 Subject: stdlib: Refactor dissect_query --- lib/stdlib/src/uri_string.erl | 91 ++++++++++++++++++------------------ lib/stdlib/test/uri_string_SUITE.erl | 18 +++++-- 2 files changed, 59 insertions(+), 50 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 2bf7ceaff1..09bf4aef1d 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -423,18 +423,21 @@ compose_query([], _Options, IsList, Acc) -> QueryString :: uri_string(), QueryList :: [{string(), string()}] | {error, atom(), list() | binary()}. +dissect_query(<<>>) -> + []; dissect_query([]) -> []; -dissect_query(QueryString) when is_binary(QueryString) -> - L = convert_list(QueryString, utf8), - try dissect_query_key(L, [], [], []) of +dissect_query(QueryString) when is_list(QueryString) -> + try + B = convert_binary(QueryString, utf8, utf8), + dissect_query_key(B, true, [], <<>>, <<>>) + of Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end; dissect_query(QueryString) -> - L = flatten_list(QueryString, utf8), - try dissect_query_key(L, [], [], []) of + try dissect_query_key(QueryString, false, [], <<>>, <<>>) of Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} @@ -1706,7 +1709,6 @@ flatten_list(Arg, _, _) -> throw({error, badarg, Arg}). - percent_encode_segment(Segment) -> percent_encode_binary(Segment, <<>>). @@ -1790,50 +1792,48 @@ is_unsafe(_) -> false. %%------------------------------------------------------------------------- %% Helper functions for dissect_query %%------------------------------------------------------------------------- -dissect_query_key([$=|T], Acc, Key, Value) -> - dissect_query_value(T, Acc, Key, Value); -dissect_query_key([H|T], Acc, Key, Value) -> - dissect_query_key(T, Acc, [H|Key], Value); -dissect_query_key(L, _, _, _) -> - throw({error, missing_value, L}). - - -dissect_query_value([$&|_] = L, Acc, Key, Value) -> - K = form_urldecode(lists:reverse(Key)), - V = form_urldecode(lists:reverse(Value)), - dissect_query_separator_amp(L, [{K,V}|Acc], [], []); -dissect_query_value([$;|_] = L, Acc, Key, Value) -> - K = form_urldecode(lists:reverse(Key)), - V = form_urldecode(lists:reverse(Value)), - dissect_query_separator_semicolon(L, [{K,V}|Acc], [], []); -dissect_query_value([H|T], Acc, Key, Value) -> - dissect_query_value(T, Acc, Key, [H|Value]); -dissect_query_value([], Acc, Key, Value) -> - K = form_urldecode(lists:reverse(Key)), - V = form_urldecode(lists:reverse(Value)), +dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_value(T, IsList, Acc, Key, Value); +dissect_query_key(< >, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, < >, Value); +dissect_query_key(B, _, _, _, _) -> + throw({error, missing_value, B}). + + +dissect_query_value(<<$&,_/binary>> = B, IsList, Acc, Key, Value) -> + K = form_urldecode(IsList, Key), + V = form_urldecode(IsList, Value), + dissect_query_separator_amp(B, IsList, [{K,V}|Acc], <<>>, <<>>); +dissect_query_value(<<$;,_/binary>> = B, IsList, Acc, Key, Value) -> + K = form_urldecode(IsList, Key), + V = form_urldecode(IsList, Value), + dissect_query_separator_semicolon(B, IsList, [{K,V}|Acc], <<>>, <<>>); +dissect_query_value(< >, IsList, Acc, Key, Value) -> + dissect_query_value(T, IsList, Acc, Key, < >); +dissect_query_value(<<>>, IsList, Acc, Key, Value) -> + K = form_urldecode(IsList, Key), + V = form_urldecode(IsList, Value), lists:reverse([{K,V}|Acc]). -dissect_query_separator_amp("&" ++ T, Acc, Key, Value) -> - dissect_query_key(T, Acc, Key, Value); -dissect_query_separator_amp("&" ++ T, Acc, Key, Value) -> - dissect_query_key(T, Acc, Key, Value); -dissect_query_separator_amp(L, _, _, _) -> - throw({error, invalid_separator, L}). +dissect_query_separator_amp(<<"&",T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, Key, Value); +dissect_query_separator_amp(<<$&,T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, Key, Value). -dissect_query_separator_semicolon([$;|T], Acc, Key, Value) -> - dissect_query_key(T, Acc, Key, Value). +dissect_query_separator_semicolon(<<$;,T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, Key, Value). %% Form-urldecode input based on RFC 1866 [8.2.1] -form_urldecode(Cs) -> - B = convert_binary(Cs, utf8, utf8), +form_urldecode(true, B) -> Result = form_urldecode(B, <<>>), - convert_list(Result, utf8). -%% + convert_list(Result, utf8); +form_urldecode(false, B) -> + form_urldecode(B, <<>>); form_urldecode(<<>>, Acc) -> - convert_list(Acc, utf8); + Acc; form_urldecode(<<$+,T/binary>>, Acc) -> form_urldecode(T, < >); form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> @@ -1843,13 +1843,14 @@ form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> form_urldecode(T, < >); false -> L = convert_list(<<$%,C0,C1,T/binary>>, utf8), - throw({error, urldecode, L}) + throw({error, invalid_percent_encoding, L}) end; -form_urldecode(< >, Acc) -> +form_urldecode(< >, Acc) -> case is_url_char(H) of true -> form_urldecode(T, < >); false -> - L = convert_list(< >, utf8), - throw({error, urldecode, L}) - end. + throw({error, invalid_character, [H]}) + end; +form_urldecode(< >, _Acc) -> + throw({error, invalid_character, [H]}). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index c230f7c631..2fc4e1a092 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -846,13 +846,21 @@ dissect_query(_Config) -> [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"), [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]), - [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"). + [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"), + [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] = + uri_string:dissect_query(<<"foo=%C3%B6&bar=2">>), + [{"foo bar","1"},{"ö","2"}] = + uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2">>]). dissect_query_negative(_Config) -> - {error,urldecode,";bar"} = + {error,invalid_character,";"} = uri_string:dissect_query("foo=1≈bar=2"), - {error,urldecode,"&bar"} = + {error,invalid_character,"&"} = uri_string:dissect_query("foo1&bar=2"), - {error,urldecode,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"), + {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"), {error,invalid_input,<<153,182>>} = - uri_string:dissect_query("foo=%99%B6&bar=2"). + uri_string:dissect_query("foo=%99%B6&bar=2"), + {error,invalid_character,"ö"} = uri_string:dissect_query("föo+bar=1&%C3%B6=2"), + {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2">>), + {error,invalid_input,<<"ö">>} = + uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2ö">>]). -- cgit v1.2.3 From b0c682a8118c5775da784e9a0f569ee995319f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Thu, 26 Oct 2017 11:29:48 +0200 Subject: stdlib: Update documentation, error tuples --- lib/stdlib/doc/src/uri_string.xml | 117 +++++++++++++++++++++++++---------- lib/stdlib/src/uri_string.erl | 44 ++++++------- lib/stdlib/test/uri_string_SUITE.erl | 2 +- 3 files changed, 109 insertions(+), 54 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index d67c687fd1..8322eecb24 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -30,10 +30,13 @@ uri_string URI processing functions. - This module contains functions for parsing and handling URIs (RFC 3986) and - form-urlencoded query strings (RFC 1866).
+This module contains functions for parsing and handling URIs + (
RFC 3986 ) and + form-urlencoded query strings (RFC 1866 ). +A URI is an identifier consisting of a sequence of characters matching the syntax - rule named URI in RFC 3986.
+ rule named URI inRFC 3986 . +The generic URI syntax consists of a hierarchical sequence of components referred to as the scheme, authority, path, query, and fragment:
@@ -55,16 +58,24 @@
The interpretation of a URI depends only on the characters used and not on how those characters are represented in a network protocol.
-The functions implemented by this module covers the following use cases:
+The functions implemented by this module cover the following use cases:
-
- Parsing URIs
-
-parse/1 - Recomposing URIs
-
-recompose/2 - Transcoding URIs
-
-transcode/2 - Working with form-urlencoded query strings
+
-compose_query/[1,2], dissect_query/1 - Parsing URIs into its components and returing a map
+
++ parse/1 - Recomposing a map of URI components into a URI string
+
++ recompose/1 - Changing inbound binary and percent-encoding of URIs
+
++ transcode/2 - Composing form-urlencoded query strings from a list of key-value pairs
+
+compose_query/1
++ compose_query/2 - Dissecting form-urlencoded query strings into a list of key-value pairs
++ dissect_query/1 There are four different encodings present during the handling of URIs:
@@ -75,14 +86,29 @@
Unless otherwise specified the return value type and encoding are the same as the input type and encoding. That is, binary input returns binary output, list input returns a list - output but mixed input returns list output. Input and output encodings are the same except - for
+ output but mixed input returns list output.transcode/2 .All of the functions but
transcode/2 expects input as unicode codepoints in lists, UTF-8 encoding in binaries and UTF-8 encoding in percent-encoded URI parts.transcode/2 provides the means to convert between the supported URI encodings.+ + + + +Error tuple indicating the type of error. Possible values of the second component:
++
+- +
invalid_character - +
invalid_input - +
invalid_map - +
invalid_percent_encoding - +
invalid_scheme - +
invalid_uri - +
invalid_utf8 - +
missing_value @@ -93,7 +119,8 @@ @@ -127,11 +162,14 @@@@ -109,13 +136,21 @@ List of unicode codepoints, UTF-8 encoded binary, or a mix of the two, - representing an RFC 3986 compliant URI (percent-encoded form). + representing an
RFC 3986 + compliant URI (percent-encoded form). A URI is a sequence of characters from a very limited set: the letters of the basic Latin alphabet, digits, and a few special characters.Composes a form-urlencoded
+based on a QueryString , a list of unescaped key-value pairs. Media type QueryList application/x-www-form-urlencoded is defined in section - 8.2.1 ofRFC 1866 (HTML 2.0). Reserved and unsafe characters, as - defined by RFC 1738 (Uniform Resource Locators), are percent-encoded. + 8.2.1 ofRFC 1866 + (HTML 2.0). Reserved and unsafe characters, as + defined byRFC 1738 + (Uniform Resource Locators), are percent-encoded.See also the opposite operation
+ .dissect_query/1 Example:
-1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]). - +1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], +1> [{separator, semicolon}]). +"foo+bar=1;city=%C3%B6rebro" +2> >,<<"1">>}, +2> {<<"city">>,<<"örebro"/utf8>>}]).]]> +>]]>Same as
+compose_query/1 but with an additionalparameter, that controls the type of separator used between key-value pairs. There are three supported separator types: Options amp (),escaped_amp () andsemicolon (;). If the parameteris empty, separator takes the default value ( Options escaped_amp ).See also the opposite operation
+ . +dissect_query/1 Example:
1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], -2> [{separator, semicolon}]). -"foo+bar=1;city=%C3%B6rebro" +1> [{separator, amp}]). +Dissects an urlencoded
+and returns a QueryString , a list of unescaped key-value pairs. Media type QueryList application/x-www-form-urlencoded is defined in section - 8.2.1 ofRFC 1866 (HTML 2.0). Percent-encoded segments are decoded - as defined by RFC 1738 (Uniform Resource Locators). + 8.2.1 ofRFC 1866 + (HTML 2.0). Percent-encoded segments are decoded + as defined byRFC 1738 + (Uniform Resource Locators).See also the opposite operation
+ .compose_query/1 Example:
1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro"). [{"foo bar","1"},{"city","örebro"}] +2> >).]]> +>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>@@ -159,14 +203,19 @@Parse URI into a map. @@ -175,12 +224,15 @@ Returns a
-URIMap , that is a uri_map() with the parsed components - of the. URIString If parsing fails, an error tuple is returned.
+ of the. If parsing fails, an error tuple is returned. + URIString See also the opposite operation
+ .recompose/1 Example:
1> uri_string:parse("foo://user@example.com:8042/over/there?name=ferret#nose"). #{fragment => "nose",host => "example.com", path => "/over/there",port => 8042,query => "name=ferret", scheme => foo,userinfo => "user"} +2> >).]]> + <<"example.com">>,path => <<"/over/there">>, + port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>, + userinfo => <<"user">>}]]>Recompose URI. - Returns an RFC 3986 compliant
-(percent-encoded). URIString If the
+is invalid, an error tuple is returned. URIMap Returns an
+RFC 3986 compliant +(percent-encoded). + If the URIString is invalid, an error tuple is returned. URIMap See also the opposite operation
+ .parse/1 Example:
1> URIMap = #{fragment => "nose", host => "example.com", path => "/over/there", -port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. +1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. #{fragment => "top",host => "example.com", path => "/over/there",port => 8042,query => "?name=ferret", scheme => foo,userinfo => "user"} @@ -194,14 +246,15 @@ port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.Transcode URI. - diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 09bf4aef1d..ca212284d2 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -229,7 +229,7 @@ -export([compose_query/1, compose_query/2, dissect_query/1, parse/1, recompose/1, transcode/2]). --export_type([uri_map/0, uri_string/0]). +-export_type([error/0, uri_map/0, uri_string/0]). %%------------------------------------------------------------------------- @@ -273,6 +273,8 @@ %% %x96 ` grave / accent %%------------------------------------------------------------------------- -type uri_string() :: iodata(). +-type error() :: {error, atom(), list() | binary()}. + %%------------------------------------------------------------------------- %% RFC 3986, Chapter 3. Syntax Components @@ -292,7 +294,7 @@ -spec parse(URIString) -> URIMap when URIString :: uri_string(), URIMap :: uri_map() - | {error, atom(), list() | binary()}. + | error(). parse(URIString) when is_binary(URIString) -> try parse_uri_reference(URIString, #{}) of Result -> Result @@ -317,7 +319,7 @@ parse(URIString) when is_list(URIString) -> -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), URIString :: uri_string() - | {error, atom(), list() | binary()}. + | error(). recompose(Map) -> case is_valid_map(Map) of false -> @@ -346,7 +348,7 @@ recompose(Map) -> URIString :: uri_string(), Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], Result :: uri_string() - | {error, atom(), list() | binary()}. + | error(). transcode(URIString, Options) when is_binary(URIString) -> try InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -357,7 +359,7 @@ transcode(URIString, Options) when is_binary(URIString) -> of Result -> Result catch - throw:{error, _, RestData} -> {error, invalid_input, RestData} + throw:{error, Atom, RestData} -> {error, Atom, RestData} end; transcode(URIString, Options) when is_list(URIString) -> InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -366,7 +368,7 @@ transcode(URIString, Options) when is_list(URIString) -> try transcode(Flattened, [], InEnc, OutEnc) of Result -> Result catch - throw:{error, _, RestData} -> {error, invalid_input, RestData} + throw:{error, Atom, RestData} -> {error, Atom, RestData} end. @@ -382,8 +384,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec compose_query(QueryList) -> QueryString when QueryList :: [{uri_string(), uri_string()}], - QueryString :: string() - | {error, atom(), list() | binary()}. + QueryString :: uri_string() + | error(). compose_query(List) -> compose_query(List, []). @@ -391,8 +393,8 @@ compose_query(List) -> -spec compose_query(QueryList, Options) -> QueryString when QueryList :: [{uri_string(), uri_string()}], Options :: [{separator, atom()}], - QueryString :: string() - | {error, atom(), list() | binary()}. + QueryString :: uri_string() + | error(). compose_query([],_Options) -> []; compose_query(List, Options) -> @@ -421,8 +423,8 @@ compose_query([], _Options, IsList, Acc) -> %%------------------------------------------------------------------------- -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), - QueryList :: [{string(), string()}] - | {error, atom(), list() | binary()}. + QueryList :: [{uri_string(), uri_string()}] + | error(). dissect_query(<<>>) -> []; dissect_query([]) -> @@ -1249,9 +1251,9 @@ decode_fragment(Cs) -> check_utf8(Cs) -> case unicode:characters_to_list(Cs) of {incomplete,_,_} -> - throw({error,non_utf8,Cs}); + throw({error,invalid_utf8,Cs}); {error,_,_} -> - throw({error,non_utf8,Cs}); + throw({error,invalid_utf8,Cs}); _ -> Cs end. @@ -1304,12 +1306,12 @@ decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> true -> B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), decode(Cs, Fun, <Transcodes an RFC 3986 compliant
, + URIString Transcodes an
-RFC 3986 + compliant, where URIString is a list of tagged tuples, specifying the inbound - ( Options in_encoding ) and outbound (out_encoding ) encodings.If an argument is invalid, an error tuple is returned.
+ (in_encoding ) and outbound (out_encoding ) encodings. + If an argument is invalid, an error tuple is returned.Example:
1> >,]]> -2> [{in_encoding, utf32},{out_encoding, utf8}]). +1> [{in_encoding, utf32},{out_encoding, utf8}]). >]]>>); - false -> throw({error,percent_decode,<<$%,C0,C1>>}) + false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) end; decode(< >, Fun, Acc) -> case Fun(C) of true -> decode(Cs, Fun, < >); - false -> throw({error,percent_decode,< >}) + false -> throw({error,invalid_percent_encoding,< >}) end; decode(<<>>, _Fun, Acc) -> Acc. @@ -1339,7 +1341,7 @@ encode(< >, Fun, Acc) -> C = encode_codepoint_binary(Char, Fun), encode(Rest, Fun, < >); encode(< >, _Fun, _Acc) -> - throw({error,percent_encode,< >}); + throw({error,invalid_input,< >}); encode(<<>>, _Fun, Acc) -> Acc. @@ -1647,12 +1649,12 @@ transcode([], Acc, List, _InEncoding, _OutEncoding) -> %% Transcode percent-encoded segment -transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) -> +transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), transcode_pct(Rest, Acc, <>, InEncoding, OutEncoding); - false -> throw({error, lists:reverse(Acc),[C0,C1]}) + false -> throw({error, invalid_percent_encoding,L}) end; transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> OutBinary = convert_binary(B, InEncoding, OutEncoding), @@ -1706,7 +1708,7 @@ flatten_list([H|T], InEnc, Acc) -> flatten_list([], _InEnc, Acc) -> lists:reverse(Acc); flatten_list(Arg, _, _) -> - throw({error, badarg, Arg}). + throw({error, invalid_input, Arg}). percent_encode_segment(Segment) -> @@ -1752,7 +1754,7 @@ form_urlencode(< >, Acc) -> form_urlencode(< >, _Acc) -> throw({error,invalid_utf8,< >}); form_urlencode(H, _Acc) -> - throw({error,badarg, H}). + throw({error,invalid_input, H}). %% Return true if input char can appear in URL according to diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 2fc4e1a092..95a49f5eb3 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -819,7 +819,7 @@ transcode_mixed(_Config) -> uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). transcode_negative(_Config) -> - {error,invalid_input,"BX"} = + {error,invalid_percent_encoding,"%BXbar"} = uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), {error,invalid_input,<<"ö">>} = uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). -- cgit v1.2.3 From 3d12c8f164f79dd67967ba5c7df7d3c555dc0f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Fri, 27 Oct 2017 14:14:22 +0200 Subject: stdlib: Allow undefined port in uri_map() uri_map() updated to allow 'undefined' ports in order to align the implementation with RFC 3986: port = *DIGIT An 'undefined' port is mapped to a ":" during recompose operation. --- lib/stdlib/src/uri_string.erl | 39 ++++++++++++++++++---- .../test/property_test/uri_string_recompose.erl | 5 +-- lib/stdlib/test/uri_string_SUITE.erl | 11 +++--- 3 files changed, 43 insertions(+), 12 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index ca212284d2..16650d5005 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -283,7 +283,7 @@ #{fragment => unicode:chardata(), host => unicode:chardata(), path => unicode:chardata(), - port => non_neg_integer(), + port => non_neg_integer() | undefined, query => unicode:chardata(), scheme => unicode:chardata(), userinfo => unicode:chardata()} | #{}. @@ -807,7 +807,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). parse_host(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_host(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -836,7 +836,7 @@ parse_host(?STRING_EMPTY, URI) -> parse_reg_name(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_reg_name(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -869,7 +869,7 @@ parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), @@ -932,7 +932,7 @@ is_ipv6(Char) -> is_hex_digit(Char). parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -1148,7 +1148,7 @@ calculate_parsed_userinfo(Input, Unparsed) -> -spec calculate_parsed_host_port(binary(), binary()) -> binary(). calculate_parsed_host_port(Input, <<>>) -> - strip_last_char(Input, [$?,$#,$/]); + strip_last_char(Input, [$:,$?,$#,$/]); calculate_parsed_host_port(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). @@ -1159,6 +1159,18 @@ calculate_parsed_query_fragment(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). +get_port(<<>>) -> + undefined; +get_port(B) -> + try binary_to_integer(B) of + Port -> + Port + catch + error:badarg -> + throw({error, invalid_uri, B}) + end. + + %% Strip last char if it is in list strip_last_char(<<>>, _) -> <<>>; strip_last_char(Input, [C0]) -> @@ -1187,6 +1199,19 @@ strip_last_char(Input, [C0,C1,C2]) -> init_binary(Input); _Else -> Input + end; +strip_last_char(Input, [C0,C1,C2,C3]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + C3 -> + init_binary(Input); + _Else -> + Input end. @@ -1530,6 +1555,8 @@ update_host(#{}, URI) -> %% URI cannot be empty for ports. E.g. ":8080" is not a valid URI +update_port(#{port := undefined}, URI) -> + concat(URI, <<":">>); update_port(#{port := Port}, URI) -> concat(URI,add_colon(encode_port(Port))); update_port(#{}, URI) -> diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl index 97f9d727a0..e51a671172 100644 --- a/lib/stdlib/test/property_test/uri_string_recompose.erl +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -267,8 +267,9 @@ host_uri() -> %% Port, Query, Fragment %%------------------------------------------------------------------------- port() -> - range(1,65535). - + frequency([{10, undefined}, + {10, range(1,65535)} + ]). query_map() -> unicode(). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 95a49f5eb3..9ee321c509 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -692,7 +692,9 @@ parse_special2(_Config) -> #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), - #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"). + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"), + #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"), + #{host := [],path := [],port := undefined} = uri_string:parse("//:"). parse_negative(_Config) -> {error,invalid_uri,"å"} = uri_string:parse("å"), @@ -702,7 +704,8 @@ parse_negative(_Config) -> {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"), {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"), {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"), - {error,non_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"). + {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"), + {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8"). %%------------------------------------------------------------------------- @@ -836,8 +839,8 @@ compose_query(_Config) -> uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]). compose_query_negative(_Config) -> - {error,badarg,4} = uri_string:compose_query([{"",4}]), - {error,badarg,5} = uri_string:compose_query([{5,""}]), + {error,invalid_input,4} = uri_string:compose_query([{"",4}]), + {error,invalid_input,5} = uri_string:compose_query([{5,""}]), {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]). dissect_query(_Config) -> -- cgit v1.2.3 From ce78af7e5a76dc4a27673ab5c80a315762b992b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Fri, 27 Oct 2017 16:54:27 +0200 Subject: stdlib: Implement normalize/1 Implements the following Syntax-Based Normalizations: - Case Normalization - Percent-Encoding Normalization - Path Segment Normalization - Scheme-Based Normalization - HTTP(S) - Basic support for FTP, SSH, SFTP, TFTP --- lib/stdlib/src/uri_string.erl | 167 ++++++++++++++++++++++++++++++++++- lib/stdlib/test/uri_string_SUITE.erl | 22 +++++ 2 files changed, 188 insertions(+), 1 deletion(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 16650d5005..cf8c388f54 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -227,7 +227,7 @@ %% External API %%------------------------------------------------------------------------- -export([compose_query/1, compose_query/2, - dissect_query/1, parse/1, + dissect_query/1, normalize/1, parse/1, recompose/1, transcode/2]). -export_type([error/0, uri_map/0, uri_string/0]). @@ -288,6 +288,21 @@ scheme => unicode:chardata(), userinfo => unicode:chardata()} | #{}. + +%%------------------------------------------------------------------------- +%% Normalize URIs +%%------------------------------------------------------------------------- +-spec normalize(URIString) -> NormalizedURI when + URIString :: uri_string(), + NormalizedURI :: uri_string(). +normalize(URIString) -> + %% Case normalization and percent-encoding normalization are achieved + %% by running parse and recompose on the input URI string. + M = parse(URIString), + M1 = normalize_scheme_based(M), + M2 = normalize_path_segment(M1), + recompose(M2). + %%------------------------------------------------------------------------- %% Parse URIs %%------------------------------------------------------------------------- @@ -1883,3 +1898,153 @@ form_urldecode(< >, Acc) -> end; form_urldecode(< >, _Acc) -> throw({error, invalid_character, [H]}). + + +%%------------------------------------------------------------------------- +%% Helper functions for normalize +%%------------------------------------------------------------------------- + +%% RFC 3986 +%% 6.2.2.3. Path Segment Normalization +%% 5.2.4. Remove Dot Segments +normalize_path_segment(Map) -> + Path = maps:get(path, Map, undefined), + Map#{path => remove_dot_segments(Path)}. + + +remove_dot_segments(Path) when is_binary(Path) -> + remove_dot_segments(Path, <<>>); +remove_dot_segments(Path) when is_list(Path) -> + B = convert_binary(Path, utf8, utf8), + B1 = remove_dot_segments(B, <<>>), + convert_list(B1, utf8). +%% +remove_dot_segments(<<>>, Output) -> + Output; +remove_dot_segments(<<"../",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"./",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"/./",T/binary>>, Output) -> + remove_dot_segments(<<$/,T/binary>>, Output); +remove_dot_segments(<<"/.">>, Output) -> + remove_dot_segments(<<$/>>, Output); +remove_dot_segments(<<"/../",T/binary>>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/,T/binary>>, Out1); +remove_dot_segments(<<"/..">>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/>>, Out1); +remove_dot_segments(<<$.>>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(<<"..">>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(Input, Output) -> + {First, Rest} = first_path_segment(Input), + remove_dot_segments(Rest, <
Error tuple indicating the type of error. Possible values of the second component:
-The third component is a list or binary providing additional information about the + cause of the error.
URI map holding the main components of a URI.
+Map holding the main components of a URI.
List of unicode codepoints, UTF-8 encoded binary, or a mix of the two, +
List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two,
representing an Composes a form-urlencoded See also the opposite operation Example: Dissects an urlencoded Supported separator types: See also the opposite operation Transforms This function implements case normalization, percent-encoding
+ normalization, path segment normalization and scheme based normalization
+ for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP. Example: Returns a Parses an See also the opposite operation Example: Returns an Creates an See also the opposite operation Transcodes an Example: The third component is a list or binary providing additional information about the
+ The third component is a term providing additional information about the
cause of the error.
-1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],
-1> [{separator, semicolon}]).
-"foo+bar=1;city=%C3%B6rebro"
+1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).
+
2> >,<<"1">>},
2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
>]]>
@@ -169,7 +183,10 @@
1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],
1> [{separator, amp}]).
-
+ uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"örebro"/utf8>>}], [{separator, escaped_amp}]).]]>
+>]]>
1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").
[{"foo bar","1"},{"city","örebro"}]
-2> >).]]>
+2> >).]]>
>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>
+1> uri_string:normalize("/a/b/c/./../../g").
+"/a/g"
+2> >).]]>
+>]]>
+3> uri_string:normalize("http://localhost:80").
+"https://localhost/"
+
+
1> >,]]>
1> [{in_encoding, utf32},{out_encoding, utf8}]).
>]]>
+2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1},
+2> {out_encoding, utf8}]).
+"foo%C3%B6bar"
This module contains functions for parsing and handling URIs
- (
A URI is an identifier consisting of a sequence of characters matching the syntax
rule named URI in
-
-
-
There are four different encodings present during the handling of URIs:
Error tuple indicating the type of error. Possible values of the second component:
The third component is a term providing additional information about the cause of the error.
@@ -143,81 +133,6 @@Composes a form-urlencoded
See also the opposite operation
Example:
--1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]). - -2> >,<<"1">>}, -2> {<<"city">>,<<"örebro"/utf8>>}]).]]> ->]]> --
Same as
See also the opposite operation
Example:
--1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], -1> [{separator, amp}]). - uri_string:compose_query([{<<"foo bar">>,<<"1">>}, -2> {<<"city">>,<<"örebro"/utf8>>}], [{separator, escaped_amp}]).]]> ->]]> --
Dissects an urlencoded
Supported separator types:
See also the opposite operation
Example:
--1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro"). -[{"foo bar","1"},{"city","örebro"}] -2> >).]]> ->,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]> --