diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 87 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 216 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 51 |
3 files changed, 4 insertions, 350 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 8fa2a92370..9ace2b0a05 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -31,8 +31,7 @@ <modulesummary>URI processing functions.</modulesummary> <description> <p>This module contains functions for parsing and handling URIs - (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and - form-urlencoded query strings (<url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>). + (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>). </p> <p>A URI is an identifier consisting of a sequence of characters matching the syntax rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>. @@ -72,13 +71,6 @@ <item>Transforming URIs into a normalized form<br></br> <seealso marker="#normalize/1"><c>normalize/1</c></seealso> </item> - <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br> - <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br> - <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso> - </item> - <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br> - <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso> - </item> </list> <p>There are four different encodings present during the handling of URIs:</p> <list type="bulleted"> @@ -110,14 +102,12 @@ <desc> <p>Error tuple indicating the type of error. Possible values of the second component:</p> <list type="bulleted"> - <item><c>invalid_character</c></item> <item><c>invalid_input</c></item> <item><c>invalid_map</c></item> <item><c>invalid_percent_encoding</c></item> <item><c>invalid_scheme</c></item> <item><c>invalid_uri</c></item> <item><c>invalid_utf8</c></item> - <item><c>missing_value</c></item> </list> <p>The third component is a term providing additional information about the cause of the error.</p> @@ -144,81 +134,6 @@ <funcs> <func> - <name name="compose_query" arity="1"/> - <fsummary>Compose urlencoded query string.</fsummary> - <desc> - <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a - <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs. - Form-urlencoding is defined in section - 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url> - (HTML 2.0) for media type <c>application/x-www-form-urlencoded</c>. - Reserved and unsafe characters, as - defined by <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url> - (Uniform Resource Locators), are percent-encoded.</p> - <p>See also the opposite operation <seealso marker="#dissect_query/1"> - <c>dissect_query/1</c></seealso>. - </p> - <p><em>Example:</em></p> - <pre> -1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input> -<![CDATA["foo+bar=1&city=%C3%B6rebro"]]> -2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>}, -2> {<<"city">>,<<"örebro"/utf8>>}]).]]> -<![CDATA[<<"foo+bar=1&city=%C3%B6rebro">>]]> - </pre> - </desc> - </func> - - <func> - <name name="compose_query" arity="2"/> - <fsummary>Compose urlencoded query string.</fsummary> - <desc> - <p>Same as <c>compose_query/1</c> but with an additional - <c><anno>Options</anno></c> parameter, that controls the type of separator used - between key-value pairs. There are three supported separator types: <c>amp</c> (<![CDATA[&]]>), <c>escaped_amp</c> (<![CDATA[&]]>) and <c>semicolon</c> (;). If the parameter <c><anno>Options</anno></c> is empty, separator takes the default value (<c>escaped_amp</c>).</p> - <p>See also the opposite operation <seealso marker="#dissect_query/1"> - <c>dissect_query/1</c></seealso>. - </p> - <p><em>Example:</em></p> - <pre> -1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input> -1> [{separator, amp}]). -<![CDATA["foo+bar=1&city=%C3%B6rebro" -2> uri_string:compose_query([{<<"foo bar">>,<<"1">>}, -2> {<<"city">>,<<"örebro"/utf8>>}], [{separator, escaped_amp}]).]]> -<![CDATA[<<"foo+bar=1&city=%C3%B6rebro">>]]> - </pre> - </desc> - </func> - - <func> - <name name="dissect_query" arity="1"/> - <fsummary>Dissect query string.</fsummary> - <desc> - <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a - <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs. - Form-urlencoding is defined in section - 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url> - (HTML 2.0) for media type <c>application/x-www-form-urlencoded</c>. - Percent-encoded segments are decoded as defined by - <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url> - (Uniform Resource Locators).</p> - <p>Supported separator types: <c>amp</c> (<![CDATA[&]]>), <c>escaped_amp</c> - (<![CDATA[&]]>) and <c>semicolon</c> (;).</p> - <p>See also the opposite operation <seealso marker="#compose_query/1"> - <c>compose_query/1</c></seealso>. - </p> - <p><em>Example:</em></p> - <pre> -1> <input>uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").</input> -[{"foo bar","1"},{"city","örebro"}] -2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1&city=%C3%B6rebro">>).]]> -<![CDATA[[{<<"foo bar">>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]> - </pre> - </desc> - </func> - - <func> <name name="normalize" arity="1"/> <fsummary>Syntax-based normalization.</fsummary> <desc> diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index f4acf1885d..22212da222 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -226,8 +226,7 @@ %%------------------------------------------------------------------------- %% External API %%------------------------------------------------------------------------- --export([compose_query/1, compose_query/2, - dissect_query/1, normalize/1, parse/1, +-export([normalize/1, parse/1, recompose/1, transcode/2]). -export_type([error/0, uri_map/0, uri_string/0]). @@ -382,75 +381,6 @@ transcode(URIString, Options) when is_list(URIString) -> end. -%%------------------------------------------------------------------------- -%% Functions for working with the query part of a URI as a list -%% of key/value pairs. -%% HTML 2.0 (RFC 1866) defines a media type application/x-www-form-urlencoded -%% in section [8.2.1] "The form-urlencoded Media Type". -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% Compose urlencoded query string from a list of unescaped key/value pairs. -%%------------------------------------------------------------------------- --spec compose_query(QueryList) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], - QueryString :: uri_string() - | error(). -compose_query(List) -> - compose_query(List, []). - - --spec compose_query(QueryList, Options) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], - Options :: [{separator, atom()}], - QueryString :: uri_string() - | error(). -compose_query([],_Options) -> - []; -compose_query(List, Options) -> - try compose_query(List, Options, false, <<>>) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end. -%% -compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> - Separator = get_separator(Options, Rest), - K = form_urlencode(Key), - V = form_urlencode(Value), - IsListNew = IsList orelse is_list(Key) orelse is_list(Value), - compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>); -compose_query([], _Options, IsList, Acc) -> - case IsList of - true -> convert_to_list(Acc, utf8); - false -> Acc - end. - - -%%------------------------------------------------------------------------- -%% Dissect a query string into a list of unescaped key/value pairs. -%%------------------------------------------------------------------------- --spec dissect_query(QueryString) -> QueryList when - QueryString :: uri_string(), - QueryList :: [{uri_string(), uri_string()}] - | error(). -dissect_query(<<>>) -> - []; -dissect_query([]) -> - []; -dissect_query(QueryString) when is_list(QueryString) -> - try - B = convert_to_binary(QueryString, utf8, utf8), - dissect_query_key(B, true, [], <<>>, <<>>) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end; -dissect_query(QueryString) -> - try dissect_query_key(QueryString, false, [], <<>>, <<>>) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end. - - %%%======================================================================== %%% Internal functions %%%======================================================================== @@ -655,7 +585,6 @@ maybe_add_path(Map) -> end. - -spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. parse_scheme(?STRING_REST($:, Rest), URI) -> {_, URI1} = parse_hier(Rest, URI), @@ -1744,149 +1673,6 @@ percent_encode_segment(Segment) -> %%------------------------------------------------------------------------- -%% Helper functions for compose_query -%%------------------------------------------------------------------------- - -%% Returns separator to be used between key-value pairs -get_separator(_, L) when length(L) =:= 0 -> - <<>>; -get_separator([], _L) -> - <<"&">>; -get_separator([{separator, amp}], _L) -> - <<"&">>; -get_separator([{separator, escaped_amp}], _L) -> - <<"&">>; -get_separator([{separator, semicolon}], _L) -> - <<";">>. - - -%% Form-urlencode input based on RFC 1866 [8.2.1] -form_urlencode(Cs) when is_list(Cs) -> - B = convert_to_binary(Cs, utf8, utf8), - form_urlencode(B, <<>>); -form_urlencode(Cs) -> - form_urlencode(Cs, <<>>). -%% -form_urlencode(<<>>, Acc) -> - Acc; -form_urlencode(<<$ ,T/binary>>, Acc) -> - form_urlencode(T, <<Acc/binary,$+>>); -form_urlencode(<<H/utf8,T/binary>>, Acc) -> - case is_url_char(H) of - true -> - form_urlencode(T, <<Acc/binary,H>>); - false -> - E = percent_encode_binary(H), - form_urlencode(T, <<Acc/binary,E/binary>>) - end; -form_urlencode(<<H,_T/binary>>, _Acc) -> - throw({error,invalid_utf8,<<H>>}); -form_urlencode(H, _Acc) -> - throw({error,invalid_input, H}). - - -%% Return true if input char can appear in URL according to -%% RFC 1738 "Uniform Resource Locators". -is_url_char(C) - when 0 =< C, C =< 31; - 128 =< C, C =< 255 -> false; -is_url_char(127) -> false; -is_url_char(C) -> - not (is_reserved(C) orelse is_unsafe(C)). - - -%% Reserved characters (RFC 1738) -is_reserved($;) -> true; -is_reserved($/) -> true; -is_reserved($?) -> true; -is_reserved($:) -> true; -is_reserved($@) -> true; -is_reserved($=) -> true; -is_reserved($&) -> true; -is_reserved(_) -> false. - - -%% Unsafe characters (RFC 1738) -is_unsafe(${) -> true; -is_unsafe($}) -> true; -is_unsafe($|) -> true; -is_unsafe($\\) -> true; -is_unsafe($^) -> true; -is_unsafe($~) -> true; -is_unsafe($[) -> true; -is_unsafe($]) -> true; -is_unsafe($`) -> true; -is_unsafe(_) -> false. - - -%%------------------------------------------------------------------------- -%% Helper functions for dissect_query -%%------------------------------------------------------------------------- -dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, Value); -dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value); -dissect_query_key(B, _, _, _, _) -> - throw({error, missing_value, B}). - - -dissect_query_value(<<$&,_/binary>> = B, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - dissect_query_separator_amp(B, IsList, [{K,V}|Acc], <<>>, <<>>); -dissect_query_value(<<$;,_/binary>> = B, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - dissect_query_separator_semicolon(B, IsList, [{K,V}|Acc], <<>>, <<>>); -dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>); -dissect_query_value(<<>>, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - lists:reverse([{K,V}|Acc]). - - -dissect_query_separator_amp(<<"&",T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value); -dissect_query_separator_amp(<<$&,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value). - - -dissect_query_separator_semicolon(<<$;,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value). - - -%% Form-urldecode input based on RFC 1866 [8.2.1] -form_urldecode(true, B) -> - Result = form_urldecode(B, <<>>), - convert_to_list(Result, utf8); -form_urldecode(false, B) -> - form_urldecode(B, <<>>); -form_urldecode(<<>>, Acc) -> - Acc; -form_urldecode(<<$+,T/binary>>, Acc) -> - form_urldecode(T, <<Acc/binary,$ >>); -form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> - case is_hex_digit(C0) andalso is_hex_digit(C1) of - true -> - V = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - form_urldecode(T, <<Acc/binary, V>>); - false -> - L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8), - throw({error, invalid_percent_encoding, L}) - end; -form_urldecode(<<H/utf8,T/binary>>, Acc) -> - case is_url_char(H) of - true -> - form_urldecode(T, <<Acc/binary,H>>); - false -> - throw({error, invalid_character, [H]}) - end; -form_urldecode(<<H,_/binary>>, _Acc) -> - throw({error, invalid_character, [H]}). - - -%%------------------------------------------------------------------------- %% Helper functions for normalize %%------------------------------------------------------------------------- diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 1567b9333a..c625da56c6 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -38,9 +38,7 @@ recompose_query/1, recompose_parse_query/1, recompose_path/1, recompose_parse_path/1, recompose_autogen/1, parse_recompose_autogen/1, - transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1, - compose_query/1, compose_query_negative/1, - dissect_query/1, dissect_query_negative/1 + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 ]). @@ -109,11 +107,7 @@ all() -> transcode_basic, transcode_options, transcode_mixed, - transcode_negative, - compose_query, - compose_query_negative, - dissect_query, - dissect_query_negative + transcode_negative ]. groups() -> @@ -829,47 +823,6 @@ transcode_negative(_Config) -> {error,invalid_input,<<"ö">>} = uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). -compose_query(_Config) -> - [] = uri_string:compose_query([]), - "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]), - "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,escaped_amp}]), - "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), - "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]), - "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]), - "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]), - <<"foo+bar=1&%C3%B6=2">> = - uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]). - -compose_query_negative(_Config) -> - {error,invalid_input,4} = uri_string:compose_query([{"",4}]), - {error,invalid_input,5} = uri_string:compose_query([{5,""}]), - {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]). - -dissect_query(_Config) -> - [] = uri_string:dissect_query(""), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"), - [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]), - [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"), - [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] = - uri_string:dissect_query(<<"foo=%C3%B6&bar=2">>), - [{"foo bar","1"},{"ö","2"}] = - uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2">>]). - -dissect_query_negative(_Config) -> - {error,invalid_character,";"} = - uri_string:dissect_query("foo=1≈bar=2"), - {error,invalid_character,"&"} = - uri_string:dissect_query("foo1&bar=2"), - {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"), - {error,invalid_input,<<153,182>>} = - uri_string:dissect_query("foo=%99%B6&bar=2"), - {error,invalid_character,"ö"} = uri_string:dissect_query("föo+bar=1&%C3%B6=2"), - {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2">>), - {error,invalid_input,<<"ö">>} = - uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2ö">>]). - normalize(_Config) -> "/a/g" = uri_string:normalize("/a/b/c/./../../g"), <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>), |