aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-11-07 16:23:09 +0100
committerPéter Dimitrov <[email protected]>2017-11-09 17:44:02 +0100
commit1bb2c76c09510bf761c4a6908ae78d1e2a87d574 (patch)
tree95632b54ac778233b4cc28ccb37110ef97c7882a
parent6db8210068a55696cd5e444d40d3676737113d03 (diff)
downloadotp-1bb2c76c09510bf761c4a6908ae78d1e2a87d574.tar.gz
otp-1bb2c76c09510bf761c4a6908ae78d1e2a87d574.tar.bz2
otp-1bb2c76c09510bf761c4a6908ae78d1e2a87d574.zip
stdlib: Implement compose and dissect query (HTML5)
Implement functions for handling form-urlencoded query strings based on the HTML5 specification.
-rw-r--r--lib/stdlib/doc/src/uri_string.xml98
-rw-r--r--lib/stdlib/src/uri_string.erl229
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl86
3 files changed, 409 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 9ace2b0a05..21f470e763 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -31,7 +31,8 @@
<modulesummary>URI processing functions.</modulesummary>
<description>
<p>This module contains functions for parsing and handling URIs
- (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>).
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
+ form-urlencoded query strings (<url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>).
</p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
@@ -71,6 +72,13 @@
<item>Transforming URIs into a normalized form<br></br>
<seealso marker="#normalize/1"><c>normalize/1</c></seealso>
</item>
+ <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
+ <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
+ <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso>
+ </item>
+ <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
+ <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso>
+ </item>
</list>
<p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
@@ -102,12 +110,15 @@
<desc>
<p>Error tuple indicating the type of error. Possible values of the second component:</p>
<list type="bulleted">
+ <item><c>invalid_character</c></item>
+ <item><c>invalid_encoding</c></item>
<item><c>invalid_input</c></item>
<item><c>invalid_map</c></item>
<item><c>invalid_percent_encoding</c></item>
<item><c>invalid_scheme</c></item>
<item><c>invalid_uri</c></item>
<item><c>invalid_utf8</c></item>
+ <item><c>missing_value</c></item>
</list>
<p>The third component is a term providing additional information about the
cause of the error.</p>
@@ -134,6 +145,91 @@
<funcs>
<func>
+ <name name="compose_query" arity="1"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ specification.
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
+<![CDATA["foo+bar=1&city=%C3%B6rebro"]]>
+2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
+<![CDATA[<<"foo+bar=1&city=%C3%B6rebro">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="compose_query" arity="2"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Same as <c>compose_query/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls the encoding ("charset")
+ used by the encoding algorithm. There are two supported encodings: <c>utf8</c>
+ (or <c>unicode</c>) and <c>latin1</c>.
+ </p>
+ <p>Each character in the entry's name and value that cannot be expressed using
+ the selected character encoding, is replaced by a string consisting of a U+0026
+ AMPERSAND character (<![CDATA[&]]>), a "#" (U+0023) character, one or more ASCII
+ digits representing the Unicode code point of the character in base ten, and
+ finally a ";" (U+003B) character.
+ </p>
+ <p>Bytes that are out of the range 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F,
+ 0x61 to 0x7A, are percent-encoded (U+0025 PERCENT SIGN character (%) followed by
+ uppercase ASCII hex digits representing the hexadecimal value of the byte).
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
+1> [{encoding, latin1}]).
+<![CDATA["foo+bar=1&city=%F6rebro"
+2> uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"東京"/utf8>>}], [{encoding, latin1}]).]]>
+<![CDATA[<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="dissect_query" arity="1"/>
+ <fsummary>Dissect query string.</fsummary>
+ <desc>
+ <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ specification.
+ </p>
+ <p>It is not as strict for its input as the decoding algorithm defined by
+ <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ and accepts all unicode characters.</p>
+ <p>See also the opposite operation <seealso marker="#compose_query/1">
+ <c>compose_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:dissect_query("foo+bar=1&city=%C3%B6rebro").]]></input>
+[{"foo bar","1"},{"city","örebro"}]
+2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>).]]>
+<![CDATA[[{<<"foo bar">>,<<"1">>},
+ {<<"city">>,<<230,157,177,228,186,172>>}] ]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
<name name="normalize" arity="1"/>
<fsummary>Syntax-based normalization.</fsummary>
<desc>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 22212da222..a84679c595 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,7 +226,8 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([normalize/1, parse/1,
+-export([compose_query/1, compose_query/2,
+ dissect_query/1, normalize/1, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -381,6 +382,76 @@ transcode(URIString, Options) when is_list(URIString) ->
end.
+%%-------------------------------------------------------------------------
+%% Functions for working with the query part of a URI as a list
+%% of key/value pairs.
+%% HTML5 - 4.10.22.6 URL-encoded form data
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Compose urlencoded query string from a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded encoding algorithm)
+%%-------------------------------------------------------------------------
+-spec compose_query(QueryList) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ QueryString :: uri_string()
+ | error().
+compose_query(List) ->
+ compose_query(List, [{encoding, utf8}]).
+
+
+-spec compose_query(QueryList, Options) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ Options :: [{encoding, atom()}],
+ QueryString :: uri_string()
+ | error().
+compose_query([],_Options) ->
+ [];
+compose_query(List, Options) ->
+ try compose_query(List, Options, false, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+%%
+compose_query([{Key,Value}|Rest], Options, IsList, Acc) ->
+ Separator = get_separator(Rest),
+ K = form_urlencode(Key, Options),
+ V = form_urlencode(Value, Options),
+ IsListNew = IsList orelse is_list(Key) orelse is_list(Value),
+ compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>);
+compose_query([], _Options, IsList, Acc) ->
+ case IsList of
+ true -> convert_to_list(Acc, utf8);
+ false -> Acc
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Dissect a query string into a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded decoding algorithm)
+%%-------------------------------------------------------------------------
+-spec dissect_query(QueryString) -> QueryList when
+ QueryString :: uri_string(),
+ QueryList :: [{uri_string(), uri_string()}]
+ | error().
+dissect_query(<<>>) ->
+ [];
+dissect_query([]) ->
+ [];
+dissect_query(QueryString) when is_list(QueryString) ->
+ try
+ B = convert_to_binary(QueryString, utf8, utf8),
+ dissect_query_key(B, true, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+dissect_query(QueryString) ->
+ try dissect_query_key(QueryString, false, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
%%%========================================================================
%%% Internal functions
%%%========================================================================
@@ -585,6 +656,7 @@ maybe_add_path(Map) ->
end.
+
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
{_, URI1} = parse_hier(Rest, URI),
@@ -1673,6 +1745,161 @@ percent_encode_segment(Segment) ->
%%-------------------------------------------------------------------------
+%% Helper functions for compose_query
+%%-------------------------------------------------------------------------
+
+%% Returns separator to be used between key-value pairs
+get_separator(L) when length(L) =:= 0 ->
+ <<>>;
+get_separator(_L) ->
+ <<"&">>.
+
+
+%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ html5_byte_encode(base10_encode(B));
+form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) ->
+ html5_byte_encode(base10_encode(Cs));
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ B = convert_to_binary(Cs, utf8, Encoding),
+ html5_byte_encode(B);
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ html5_byte_encode(Cs);
+form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) ->
+ throw({error,invalid_encoding, Encoding});
+form_urlencode(Cs, _) ->
+ throw({error,invalid_input, Cs}).
+
+
+%% For each character in the entry's name and value that cannot be expressed using
+%% the selected character encoding, replace the character by a string consisting of
+%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII
+%% digits representing the Unicode code point of the character in base ten, and
+%% finally a ";" (U+003B) character.
+base10_encode(Cs) ->
+ base10_encode(Cs, <<>>).
+%%
+base10_encode(<<>>, Acc) ->
+ Acc;
+base10_encode(<<H/utf8,T/binary>>, Acc) when H > 255 ->
+ Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8),
+ base10_encode(T, <<Acc/binary,"&#",Base10/binary,$;>>);
+base10_encode(<<H/utf8,T/binary>>, Acc) ->
+ base10_encode(T, <<Acc/binary,H>>).
+
+
+html5_byte_encode(B) ->
+ html5_byte_encode(B, <<>>).
+%%
+html5_byte_encode(<<>>, Acc) ->
+ Acc;
+html5_byte_encode(<<$ ,T/binary>>, Acc) ->
+ html5_byte_encode(T, <<Acc/binary,$+>>);
+html5_byte_encode(<<H,T/binary>>, Acc) ->
+ case is_url_char(H) of
+ true ->
+ html5_byte_encode(T, <<Acc/binary,H>>);
+ false ->
+ <<A:4,B:4>> = <<H>>,
+ html5_byte_encode(T, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>)
+ end;
+html5_byte_encode(H, _Acc) ->
+ throw({error,invalid_input, H}).
+
+
+%% Return true if input char can appear in form-urlencoded string
+%% Allowed chararacters:
+%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A,
+%% 0x5F, 0x61 to 0x7A
+is_url_char(C)
+ when C =:= 16#2A; C =:= 16#2D;
+ C =:= 16#2E; C =:= 16#5F;
+ 16#30 =< C, C =< 16#39;
+ 16#41 =< C, C =< 16#5A;
+ 16#61 =< C, C =< 16#7A -> true;
+is_url_char(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for dissect_query
+%%-------------------------------------------------------------------------
+dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, Value);
+dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,"&#">>, Value);
+dissect_query_key(<<$&,_T/binary>>, _IsList, _Acc, _Key, _Value) ->
+ throw({error, missing_value, "&"});
+dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value);
+dissect_query_key(B, _, _, _, _) ->
+ throw({error, missing_value, B}).
+
+
+dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>);
+dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>);
+dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ lists:reverse([{K,V}|Acc]).
+
+
+%% Form-urldecode input based on RFC 1866 [8.2.1]
+form_urldecode(true, B) ->
+ Result = base10_decode(form_urldecode(B, <<>>)),
+ convert_to_list(Result, utf8);
+form_urldecode(false, B) ->
+ base10_decode(form_urldecode(B, <<>>));
+form_urldecode(<<>>, Acc) ->
+ Acc;
+form_urldecode(<<$+,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,$ >>);
+form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ form_urldecode(T, <<Acc/binary, V>>);
+ false ->
+ L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8),
+ throw({error, invalid_percent_encoding, L})
+ end;
+form_urldecode(<<H/utf8,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,H/utf8>>);
+form_urldecode(<<H,_/binary>>, _Acc) ->
+ throw({error, invalid_character, [H]}).
+
+base10_decode(Cs) ->
+ base10_decode(Cs, <<>>).
+%
+base10_decode(<<>>, Acc) ->
+ Acc;
+base10_decode(<<"&#",T/binary>>, Acc) ->
+ base10_decode_unicode(T, Acc);
+base10_decode(<<H/utf8,T/binary>>, Acc) ->
+ base10_decode(T,<<Acc/binary,H/utf8>>);
+base10_decode(<<H,_/binary>>, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+base10_decode_unicode(B, Acc) ->
+ base10_decode_unicode(B, 0, Acc).
+%%
+base10_decode_unicode(<<H/utf8,T/binary>>, Codepoint, Acc) when $0 =< H, H =< $9 ->
+ Res = Codepoint * 10 + (H - $0),
+ base10_decode_unicode(T, Res, Acc);
+base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) ->
+ base10_decode(T, <<Acc/binary,Codepoint/utf8>>);
+base10_decode_unicode(<<H,_/binary>>, _, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+%%-------------------------------------------------------------------------
%% Helper functions for normalize
%%-------------------------------------------------------------------------
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index c625da56c6..fef356355c 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -38,7 +38,10 @@
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
recompose_autogen/1, parse_recompose_autogen/1,
- transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1,
+ compose_query/1, compose_query_latin1/1, compose_query_negative/1,
+ dissect_query/1, dissect_query_negative/1,
+ interop_query_latin1/1, interop_query_utf8/1
]).
@@ -107,7 +110,14 @@ all() ->
transcode_basic,
transcode_options,
transcode_mixed,
- transcode_negative
+ transcode_negative,
+ compose_query,
+ compose_query_latin1,
+ compose_query_negative,
+ dissect_query,
+ dissect_query_negative,
+ interop_query_latin1,
+ interop_query_utf8
].
groups() ->
@@ -823,6 +833,65 @@ transcode_negative(_Config) ->
{error,invalid_input,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
+compose_query(_Config) ->
+ [] = uri_string:compose_query([]),
+ "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]),
+ "foo=1&b%C3%A4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,utf8}]),
+ "foo=1&b%C3%A4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,unicode}]),
+ "foo=1&b%E4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,latin1}]),
+ "foo+bar=1&%E5%90%88=2" = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
+ "foo+bar=1&%26%2321512%3B=2" =
+ uri_string:compose_query([{"foo bar","1"}, {"合", "2"}],[{encoding,latin1}]),
+ "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]),
+ <<"foo+bar=1&%C3%B6=2">> =
+ uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]).
+
+compose_query_latin1(_Config) ->
+ Q = uri_string:compose_query([{"合foö bar","1"}, {"合", "合"}],[{encoding,latin1}]),
+ Q1 = uri_string:transcode(Q, [{in_encoding, latin1}]),
+ [{"合foö bar","1"}, {"合", "合"}] = uri_string:dissect_query(Q1),
+ Q2 = uri_string:compose_query([{<<"合foö bar"/utf8>>,<<"1">>}, {<<"合"/utf8>>, <<"合"/utf8>>}],
+ [{encoding,latin1}]),
+ Q3 = uri_string:transcode(Q2, [{in_encoding, latin1}]),
+ [{<<"合foö bar"/utf8>>,<<"1">>}, {<<"合"/utf8>>, <<"合"/utf8>>}] =
+ uri_string:dissect_query(Q3).
+
+compose_query_negative(_Config) ->
+ {error,invalid_input,4} = uri_string:compose_query([{"",4}]),
+ {error,invalid_input,5} = uri_string:compose_query([{5,""}]),
+ {error,invalid_encoding,utf16} =
+ uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}],[{encoding,utf16}]).
+
+dissect_query(_Config) ->
+ [] = uri_string:dissect_query(""),
+ [{"foo","1"}, {"amp;bar", "2"}] = uri_string:dissect_query("foo=1&amp;bar=2"),
+ [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"),
+ [{"foo","1;bar=2"}] = uri_string:dissect_query("foo=1;bar=2"),
+ [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1&bar=2">>,"22"]),
+ [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"),
+ [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] =
+ uri_string:dissect_query(<<"foo=%C3%B6&bar=2">>),
+ [{"foo bar","1"},{"ö","2"}] =
+ uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2">>]),
+ [{"foo bar","1"},{[21512],"2"}] =
+ uri_string:dissect_query("foo+bar=1&%26%2321512%3B=2"),
+ [{<<"foo bar">>,<<"1">>},{<<"合"/utf8>>,<<"2">>}] =
+ uri_string:dissect_query(<<"foo+bar=1&%26%2321512%3B=2">>),
+ [{"föo bar","1"},{"ö","2"}] =
+ uri_string:dissect_query("föo+bar=1&%C3%B6=2"),
+ [{<<"föo bar"/utf8>>,<<"1">>},{<<"ö"/utf8>>,<<"2">>}] =
+ uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2"/utf8>>).
+
+dissect_query_negative(_Config) ->
+ {error,missing_value,"&"} =
+ uri_string:dissect_query("foo1&bar=2"),
+ {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&amp;bar=2"),
+ {error,invalid_input,[153]} =
+ uri_string:dissect_query("foo=%99%B6&amp;bar=2"),
+ {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2">>),
+ {error,invalid_input,<<"ö">>} =
+ uri_string:dissect_query([<<"foo+bar=1&amp;">>,<<"%C3%B6=2ö">>]).
+
normalize(_Config) ->
"/a/g" = uri_string:normalize("/a/b/c/./../../g"),
<<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),
@@ -842,3 +911,16 @@ normalize(_Config) ->
uri_string:normalize(<<"sftp://localhost:22">>),
<<"tftp://localhost">> =
uri_string:normalize(<<"tftp://localhost:69">>).
+
+interop_query_utf8(_Config) ->
+ Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
+ Uri = uri_string:recompose(#{path => "/", query => Q}),
+ #{query := Q1} = uri_string:parse(Uri),
+ [{"foo bar","1"}, {"合", "2"}] = uri_string:dissect_query(Q1).
+
+interop_query_latin1(_Config) ->
+ Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}], [{encoding,latin1}]),
+ Uri = uri_string:recompose(#{path => "/", query => Q}),
+ Uri1 = uri_string:transcode(Uri, [{in_encoding, latin1}]),
+ #{query := Q1} = uri_string:parse(Uri1),
+ [{"foo bar","1"}, {"合", "2"}] = uri_string:dissect_query(Q1).