aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/stdlib/doc/src/uri_string.xml87
-rw-r--r--lib/stdlib/src/uri_string.erl216
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl51
3 files changed, 4 insertions, 350 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 8fa2a92370..9ace2b0a05 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -31,8 +31,7 @@
<modulesummary>URI processing functions.</modulesummary>
<description>
<p>This module contains functions for parsing and handling URIs
- (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
- form-urlencoded query strings (<url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>).
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>).
</p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
@@ -72,13 +71,6 @@
<item>Transforming URIs into a normalized form<br></br>
<seealso marker="#normalize/1"><c>normalize/1</c></seealso>
</item>
- <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
- <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
- <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso>
- </item>
- <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
- <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso>
- </item>
</list>
<p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
@@ -110,14 +102,12 @@
<desc>
<p>Error tuple indicating the type of error. Possible values of the second component:</p>
<list type="bulleted">
- <item><c>invalid_character</c></item>
<item><c>invalid_input</c></item>
<item><c>invalid_map</c></item>
<item><c>invalid_percent_encoding</c></item>
<item><c>invalid_scheme</c></item>
<item><c>invalid_uri</c></item>
<item><c>invalid_utf8</c></item>
- <item><c>missing_value</c></item>
</list>
<p>The third component is a term providing additional information about the
cause of the error.</p>
@@ -144,81 +134,6 @@
<funcs>
<func>
- <name name="compose_query" arity="1"/>
- <fsummary>Compose urlencoded query string.</fsummary>
- <desc>
- <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
- <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
- Form-urlencoding is defined in section
- 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>
- (HTML 2.0) for media type <c>application/x-www-form-urlencoded</c>.
- Reserved and unsafe characters, as
- defined by <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url>
- (Uniform Resource Locators), are percent-encoded.</p>
- <p>See also the opposite operation <seealso marker="#dissect_query/1">
- <c>dissect_query/1</c></seealso>.
- </p>
- <p><em>Example:</em></p>
- <pre>
-1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
-<![CDATA["foo+bar=1&amp;city=%C3%B6rebro"]]>
-2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>},
-2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
-<![CDATA[<<"foo+bar=1&amp;city=%C3%B6rebro">>]]>
- </pre>
- </desc>
- </func>
-
- <func>
- <name name="compose_query" arity="2"/>
- <fsummary>Compose urlencoded query string.</fsummary>
- <desc>
- <p>Same as <c>compose_query/1</c> but with an additional
- <c><anno>Options</anno></c> parameter, that controls the type of separator used
- between key-value pairs. There are three supported separator types: <c>amp</c> (<![CDATA[&]]>), <c>escaped_amp</c> (<![CDATA[&amp;]]>) and <c>semicolon</c> (;). If the parameter <c><anno>Options</anno></c> is empty, separator takes the default value (<c>escaped_amp</c>).</p>
- <p>See also the opposite operation <seealso marker="#dissect_query/1">
- <c>dissect_query/1</c></seealso>.
- </p>
- <p><em>Example:</em></p>
- <pre>
-1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
-1> [{separator, amp}]).
-<![CDATA["foo+bar=1&city=%C3%B6rebro"
-2> uri_string:compose_query([{<<"foo bar">>,<<"1">>},
-2> {<<"city">>,<<"örebro"/utf8>>}], [{separator, escaped_amp}]).]]>
-<![CDATA[<<"foo+bar=1&amp;city=%C3%B6rebro">>]]>
- </pre>
- </desc>
- </func>
-
- <func>
- <name name="dissect_query" arity="1"/>
- <fsummary>Dissect query string.</fsummary>
- <desc>
- <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
- <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
- Form-urlencoding is defined in section
- 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>
- (HTML 2.0) for media type <c>application/x-www-form-urlencoded</c>.
- Percent-encoded segments are decoded as defined by
- <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url>
- (Uniform Resource Locators).</p>
- <p>Supported separator types: <c>amp</c> (<![CDATA[&]]>), <c>escaped_amp</c>
- (<![CDATA[&amp;]]>) and <c>semicolon</c> (;).</p>
- <p>See also the opposite operation <seealso marker="#compose_query/1">
- <c>compose_query/1</c></seealso>.
- </p>
- <p><em>Example:</em></p>
- <pre>
-1> <input>uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").</input>
-[{"foo bar","1"},{"city","örebro"}]
-2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1&city=%C3%B6rebro">>).]]>
-<![CDATA[[{<<"foo bar">>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>
- </pre>
- </desc>
- </func>
-
- <func>
<name name="normalize" arity="1"/>
<fsummary>Syntax-based normalization.</fsummary>
<desc>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index f4acf1885d..22212da222 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,8 +226,7 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([compose_query/1, compose_query/2,
- dissect_query/1, normalize/1, parse/1,
+-export([normalize/1, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -382,75 +381,6 @@ transcode(URIString, Options) when is_list(URIString) ->
end.
-%%-------------------------------------------------------------------------
-%% Functions for working with the query part of a URI as a list
-%% of key/value pairs.
-%% HTML 2.0 (RFC 1866) defines a media type application/x-www-form-urlencoded
-%% in section [8.2.1] "The form-urlencoded Media Type".
-%%-------------------------------------------------------------------------
-
-%%-------------------------------------------------------------------------
-%% Compose urlencoded query string from a list of unescaped key/value pairs.
-%%-------------------------------------------------------------------------
--spec compose_query(QueryList) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
- QueryString :: uri_string()
- | error().
-compose_query(List) ->
- compose_query(List, []).
-
-
--spec compose_query(QueryList, Options) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
- Options :: [{separator, atom()}],
- QueryString :: uri_string()
- | error().
-compose_query([],_Options) ->
- [];
-compose_query(List, Options) ->
- try compose_query(List, Options, false, <<>>)
- catch
- throw:{error, Atom, RestData} -> {error, Atom, RestData}
- end.
-%%
-compose_query([{Key,Value}|Rest], Options, IsList, Acc) ->
- Separator = get_separator(Options, Rest),
- K = form_urlencode(Key),
- V = form_urlencode(Value),
- IsListNew = IsList orelse is_list(Key) orelse is_list(Value),
- compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>);
-compose_query([], _Options, IsList, Acc) ->
- case IsList of
- true -> convert_to_list(Acc, utf8);
- false -> Acc
- end.
-
-
-%%-------------------------------------------------------------------------
-%% Dissect a query string into a list of unescaped key/value pairs.
-%%-------------------------------------------------------------------------
--spec dissect_query(QueryString) -> QueryList when
- QueryString :: uri_string(),
- QueryList :: [{uri_string(), uri_string()}]
- | error().
-dissect_query(<<>>) ->
- [];
-dissect_query([]) ->
- [];
-dissect_query(QueryString) when is_list(QueryString) ->
- try
- B = convert_to_binary(QueryString, utf8, utf8),
- dissect_query_key(B, true, [], <<>>, <<>>)
- catch
- throw:{error, Atom, RestData} -> {error, Atom, RestData}
- end;
-dissect_query(QueryString) ->
- try dissect_query_key(QueryString, false, [], <<>>, <<>>)
- catch
- throw:{error, Atom, RestData} -> {error, Atom, RestData}
- end.
-
-
%%%========================================================================
%%% Internal functions
%%%========================================================================
@@ -655,7 +585,6 @@ maybe_add_path(Map) ->
end.
-
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
{_, URI1} = parse_hier(Rest, URI),
@@ -1744,149 +1673,6 @@ percent_encode_segment(Segment) ->
%%-------------------------------------------------------------------------
-%% Helper functions for compose_query
-%%-------------------------------------------------------------------------
-
-%% Returns separator to be used between key-value pairs
-get_separator(_, L) when length(L) =:= 0 ->
- <<>>;
-get_separator([], _L) ->
- <<"&amp;">>;
-get_separator([{separator, amp}], _L) ->
- <<"&">>;
-get_separator([{separator, escaped_amp}], _L) ->
- <<"&amp;">>;
-get_separator([{separator, semicolon}], _L) ->
- <<";">>.
-
-
-%% Form-urlencode input based on RFC 1866 [8.2.1]
-form_urlencode(Cs) when is_list(Cs) ->
- B = convert_to_binary(Cs, utf8, utf8),
- form_urlencode(B, <<>>);
-form_urlencode(Cs) ->
- form_urlencode(Cs, <<>>).
-%%
-form_urlencode(<<>>, Acc) ->
- Acc;
-form_urlencode(<<$ ,T/binary>>, Acc) ->
- form_urlencode(T, <<Acc/binary,$+>>);
-form_urlencode(<<H/utf8,T/binary>>, Acc) ->
- case is_url_char(H) of
- true ->
- form_urlencode(T, <<Acc/binary,H>>);
- false ->
- E = percent_encode_binary(H),
- form_urlencode(T, <<Acc/binary,E/binary>>)
- end;
-form_urlencode(<<H,_T/binary>>, _Acc) ->
- throw({error,invalid_utf8,<<H>>});
-form_urlencode(H, _Acc) ->
- throw({error,invalid_input, H}).
-
-
-%% Return true if input char can appear in URL according to
-%% RFC 1738 "Uniform Resource Locators".
-is_url_char(C)
- when 0 =< C, C =< 31;
- 128 =< C, C =< 255 -> false;
-is_url_char(127) -> false;
-is_url_char(C) ->
- not (is_reserved(C) orelse is_unsafe(C)).
-
-
-%% Reserved characters (RFC 1738)
-is_reserved($;) -> true;
-is_reserved($/) -> true;
-is_reserved($?) -> true;
-is_reserved($:) -> true;
-is_reserved($@) -> true;
-is_reserved($=) -> true;
-is_reserved($&) -> true;
-is_reserved(_) -> false.
-
-
-%% Unsafe characters (RFC 1738)
-is_unsafe(${) -> true;
-is_unsafe($}) -> true;
-is_unsafe($|) -> true;
-is_unsafe($\\) -> true;
-is_unsafe($^) -> true;
-is_unsafe($~) -> true;
-is_unsafe($[) -> true;
-is_unsafe($]) -> true;
-is_unsafe($`) -> true;
-is_unsafe(_) -> false.
-
-
-%%-------------------------------------------------------------------------
-%% Helper functions for dissect_query
-%%-------------------------------------------------------------------------
-dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_value(T, IsList, Acc, Key, Value);
-dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value);
-dissect_query_key(B, _, _, _, _) ->
- throw({error, missing_value, B}).
-
-
-dissect_query_value(<<$&,_/binary>> = B, IsList, Acc, Key, Value) ->
- K = form_urldecode(IsList, Key),
- V = form_urldecode(IsList, Value),
- dissect_query_separator_amp(B, IsList, [{K,V}|Acc], <<>>, <<>>);
-dissect_query_value(<<$;,_/binary>> = B, IsList, Acc, Key, Value) ->
- K = form_urldecode(IsList, Key),
- V = form_urldecode(IsList, Value),
- dissect_query_separator_semicolon(B, IsList, [{K,V}|Acc], <<>>, <<>>);
-dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>);
-dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
- K = form_urldecode(IsList, Key),
- V = form_urldecode(IsList, Value),
- lists:reverse([{K,V}|Acc]).
-
-
-dissect_query_separator_amp(<<"&amp;",T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_key(T, IsList, Acc, Key, Value);
-dissect_query_separator_amp(<<$&,T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_key(T, IsList, Acc, Key, Value).
-
-
-dissect_query_separator_semicolon(<<$;,T/binary>>, IsList, Acc, Key, Value) ->
- dissect_query_key(T, IsList, Acc, Key, Value).
-
-
-%% Form-urldecode input based on RFC 1866 [8.2.1]
-form_urldecode(true, B) ->
- Result = form_urldecode(B, <<>>),
- convert_to_list(Result, utf8);
-form_urldecode(false, B) ->
- form_urldecode(B, <<>>);
-form_urldecode(<<>>, Acc) ->
- Acc;
-form_urldecode(<<$+,T/binary>>, Acc) ->
- form_urldecode(T, <<Acc/binary,$ >>);
-form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
- case is_hex_digit(C0) andalso is_hex_digit(C1) of
- true ->
- V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
- form_urldecode(T, <<Acc/binary, V>>);
- false ->
- L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8),
- throw({error, invalid_percent_encoding, L})
- end;
-form_urldecode(<<H/utf8,T/binary>>, Acc) ->
- case is_url_char(H) of
- true ->
- form_urldecode(T, <<Acc/binary,H>>);
- false ->
- throw({error, invalid_character, [H]})
- end;
-form_urldecode(<<H,_/binary>>, _Acc) ->
- throw({error, invalid_character, [H]}).
-
-
-%%-------------------------------------------------------------------------
%% Helper functions for normalize
%%-------------------------------------------------------------------------
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 1567b9333a..c625da56c6 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -38,9 +38,7 @@
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
recompose_autogen/1, parse_recompose_autogen/1,
- transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1,
- compose_query/1, compose_query_negative/1,
- dissect_query/1, dissect_query_negative/1
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
]).
@@ -109,11 +107,7 @@ all() ->
transcode_basic,
transcode_options,
transcode_mixed,
- transcode_negative,
- compose_query,
- compose_query_negative,
- dissect_query,
- dissect_query_negative
+ transcode_negative
].
groups() ->
@@ -829,47 +823,6 @@ transcode_negative(_Config) ->
{error,invalid_input,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
-compose_query(_Config) ->
- [] = uri_string:compose_query([]),
- "foo=1&amp;bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]),
- "foo=1&amp;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,escaped_amp}]),
- "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]),
- "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]),
- "foo+bar=1&amp;%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]),
- "foo+bar=1&amp;%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]),
- <<"foo+bar=1&amp;%C3%B6=2">> =
- uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]).
-
-compose_query_negative(_Config) ->
- {error,invalid_input,4} = uri_string:compose_query([{"",4}]),
- {error,invalid_input,5} = uri_string:compose_query([{5,""}]),
- {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]).
-
-dissect_query(_Config) ->
- [] = uri_string:dissect_query(""),
- [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&amp;bar=2"),
- [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"),
- [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"),
- [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]),
- [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&amp;bar=2"),
- [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] =
- uri_string:dissect_query(<<"foo=%C3%B6&amp;bar=2">>),
- [{"foo bar","1"},{"ö","2"}] =
- uri_string:dissect_query([<<"foo+bar=1&amp;">>,<<"%C3%B6=2">>]).
-
-dissect_query_negative(_Config) ->
- {error,invalid_character,";"} =
- uri_string:dissect_query("foo=1&ap;bar=2"),
- {error,invalid_character,"&"} =
- uri_string:dissect_query("foo1&amp;bar=2"),
- {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&amp;bar=2"),
- {error,invalid_input,<<153,182>>} =
- uri_string:dissect_query("foo=%99%B6&amp;bar=2"),
- {error,invalid_character,"ö"} = uri_string:dissect_query("föo+bar=1&amp;%C3%B6=2"),
- {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&amp;%C3%B6=2">>),
- {error,invalid_input,<<"ö">>} =
- uri_string:dissect_query([<<"foo+bar=1&amp;">>,<<"%C3%B6=2ö">>]).
-
normalize(_Config) ->
"/a/g" = uri_string:normalize("/a/b/c/./../../g"),
<<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),