From 7e5d062973e7cb4f9ee949529e9dcdb5785c1304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Mon, 6 Nov 2017 09:54:12 +0100 Subject: stdlib: Remove compose_query and dissect_query compose_query/{1,2} and dissect_query/1 removed as the implemented specification (HTML 2.0) is old. They will be re-implemented based on HTML5. --- lib/stdlib/doc/src/uri_string.xml | 87 +------------- lib/stdlib/src/uri_string.erl | 216 +---------------------------------- lib/stdlib/test/uri_string_SUITE.erl | 51 +-------- 3 files changed, 4 insertions(+), 350 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 8fa2a92370..9ace2b0a05 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -31,8 +31,7 @@ URI processing functions.

This module contains functions for parsing and handling URIs - (RFC 3986) and - form-urlencoded query strings (RFC 1866). + (RFC 3986).

A URI is an identifier consisting of a sequence of characters matching the syntax rule named URI in RFC 3986. @@ -72,13 +71,6 @@ Transforming URIs into a normalized form

normalize/1
- Composing form-urlencoded query strings from a list of key-value pairs

- compose_query/1

- compose_query/2 -
- Dissecting form-urlencoded query strings into a list of key-value pairs

- dissect_query/1 -

There are four different encodings present during the handling of URIs:

@@ -110,14 +102,12 @@

Error tuple indicating the type of error. Possible values of the second component:

- invalid_character invalid_input invalid_map invalid_percent_encoding invalid_scheme invalid_uri invalid_utf8 - missing_value

The third component is a term providing additional information about the cause of the error.

@@ -143,81 +133,6 @@ - - - Compose urlencoded query string. - -

Composes a form-urlencoded QueryString based on a - QueryList, a list of non-percent-encoded key-value pairs. - Form-urlencoding is defined in section - 8.2.1 of RFC 1866 - (HTML 2.0) for media type application/x-www-form-urlencoded. - Reserved and unsafe characters, as - defined by RFC 1738 - (Uniform Resource Locators), are percent-encoded.

-

See also the opposite operation - dissect_query/1. -

-

Example:

-
-1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).
-
-2> >,<<"1">>},
-2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
->]]>
-	
-
-
- - - - Compose urlencoded query string. - -

Same as compose_query/1 but with an additional - Options parameter, that controls the type of separator used - between key-value pairs. There are three supported separator types: amp (), escaped_amp () and semicolon (;). If the parameter Options is empty, separator takes the default value (escaped_amp).

-

See also the opposite operation - dissect_query/1. -

-

Example:

-
-1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],
-1> [{separator, amp}]).
- uri_string:compose_query([{<<"foo bar">>,<<"1">>},
-2> {<<"city">>,<<"örebro"/utf8>>}], [{separator, escaped_amp}]).]]>
->]]>
-	
-
-
- - - - Dissect query string. - -

Dissects an urlencoded QueryString and returns a - QueryList, a list of non-percent-encoded key-value pairs. - Form-urlencoding is defined in section - 8.2.1 of RFC 1866 - (HTML 2.0) for media type application/x-www-form-urlencoded. - Percent-encoded segments are decoded as defined by - RFC 1738 - (Uniform Resource Locators).

-

Supported separator types: amp (), escaped_amp - () and semicolon (;).

-

See also the opposite operation - compose_query/1. -

-

Example:

-
-1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").
-[{"foo bar","1"},{"city","örebro"}]
-2> >).]]>
->,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>
-	
-
-
- Syntax-based normalization. diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index f4acf1885d..22212da222 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -226,8 +226,7 @@ %%------------------------------------------------------------------------- %% External API %%------------------------------------------------------------------------- --export([compose_query/1, compose_query/2, - dissect_query/1, normalize/1, parse/1, +-export([normalize/1, parse/1, recompose/1, transcode/2]). -export_type([error/0, uri_map/0, uri_string/0]). @@ -382,75 +381,6 @@ transcode(URIString, Options) when is_list(URIString) -> end. -%%------------------------------------------------------------------------- -%% Functions for working with the query part of a URI as a list -%% of key/value pairs. -%% HTML 2.0 (RFC 1866) defines a media type application/x-www-form-urlencoded -%% in section [8.2.1] "The form-urlencoded Media Type". -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% Compose urlencoded query string from a list of unescaped key/value pairs. -%%------------------------------------------------------------------------- --spec compose_query(QueryList) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], - QueryString :: uri_string() - | error(). -compose_query(List) -> - compose_query(List, []). - - --spec compose_query(QueryList, Options) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], - Options :: [{separator, atom()}], - QueryString :: uri_string() - | error(). -compose_query([],_Options) -> - []; -compose_query(List, Options) -> - try compose_query(List, Options, false, <<>>) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end. -%% -compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> - Separator = get_separator(Options, Rest), - K = form_urlencode(Key), - V = form_urlencode(Value), - IsListNew = IsList orelse is_list(Key) orelse is_list(Value), - compose_query(Rest, Options, IsListNew, <>; -get_separator([{separator, amp}], _L) -> - <<"&">>; -get_separator([{separator, escaped_amp}], _L) -> - <<"&">>; -get_separator([{separator, semicolon}], _L) -> - <<";">>. - - -%% Form-urlencode input based on RFC 1866 [8.2.1] -form_urlencode(Cs) when is_list(Cs) -> - B = convert_to_binary(Cs, utf8, utf8), - form_urlencode(B, <<>>); -form_urlencode(Cs) -> - form_urlencode(Cs, <<>>). -%% -form_urlencode(<<>>, Acc) -> - Acc; -form_urlencode(<<$ ,T/binary>>, Acc) -> - form_urlencode(T, <>); -form_urlencode(<>, Acc) -> - case is_url_char(H) of - true -> - form_urlencode(T, <>); - false -> - E = percent_encode_binary(H), - form_urlencode(T, <>) - end; -form_urlencode(<>, _Acc) -> - throw({error,invalid_utf8,<>}); -form_urlencode(H, _Acc) -> - throw({error,invalid_input, H}). - - -%% Return true if input char can appear in URL according to -%% RFC 1738 "Uniform Resource Locators". -is_url_char(C) - when 0 =< C, C =< 31; - 128 =< C, C =< 255 -> false; -is_url_char(127) -> false; -is_url_char(C) -> - not (is_reserved(C) orelse is_unsafe(C)). - - -%% Reserved characters (RFC 1738) -is_reserved($;) -> true; -is_reserved($/) -> true; -is_reserved($?) -> true; -is_reserved($:) -> true; -is_reserved($@) -> true; -is_reserved($=) -> true; -is_reserved($&) -> true; -is_reserved(_) -> false. - - -%% Unsafe characters (RFC 1738) -is_unsafe(${) -> true; -is_unsafe($}) -> true; -is_unsafe($|) -> true; -is_unsafe($\\) -> true; -is_unsafe($^) -> true; -is_unsafe($~) -> true; -is_unsafe($[) -> true; -is_unsafe($]) -> true; -is_unsafe($`) -> true; -is_unsafe(_) -> false. - - -%%------------------------------------------------------------------------- -%% Helper functions for dissect_query -%%------------------------------------------------------------------------- -dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, Value); -dissect_query_key(<>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, <>, Value); -dissect_query_key(B, _, _, _, _) -> - throw({error, missing_value, B}). - - -dissect_query_value(<<$&,_/binary>> = B, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - dissect_query_separator_amp(B, IsList, [{K,V}|Acc], <<>>, <<>>); -dissect_query_value(<<$;,_/binary>> = B, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - dissect_query_separator_semicolon(B, IsList, [{K,V}|Acc], <<>>, <<>>); -dissect_query_value(<>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, <>); -dissect_query_value(<<>>, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - lists:reverse([{K,V}|Acc]). - - -dissect_query_separator_amp(<<"&",T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value); -dissect_query_separator_amp(<<$&,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value). - - -dissect_query_separator_semicolon(<<$;,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, Key, Value). - - -%% Form-urldecode input based on RFC 1866 [8.2.1] -form_urldecode(true, B) -> - Result = form_urldecode(B, <<>>), - convert_to_list(Result, utf8); -form_urldecode(false, B) -> - form_urldecode(B, <<>>); -form_urldecode(<<>>, Acc) -> - Acc; -form_urldecode(<<$+,T/binary>>, Acc) -> - form_urldecode(T, <>); -form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> - case is_hex_digit(C0) andalso is_hex_digit(C1) of - true -> - V = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - form_urldecode(T, <>); - false -> - L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8), - throw({error, invalid_percent_encoding, L}) - end; -form_urldecode(<>, Acc) -> - case is_url_char(H) of - true -> - form_urldecode(T, <>); - false -> - throw({error, invalid_character, [H]}) - end; -form_urldecode(<>, _Acc) -> - throw({error, invalid_character, [H]}). - - %%------------------------------------------------------------------------- %% Helper functions for normalize %%------------------------------------------------------------------------- diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 1567b9333a..c625da56c6 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -38,9 +38,7 @@ recompose_query/1, recompose_parse_query/1, recompose_path/1, recompose_parse_path/1, recompose_autogen/1, parse_recompose_autogen/1, - transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1, - compose_query/1, compose_query_negative/1, - dissect_query/1, dissect_query_negative/1 + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 ]). @@ -109,11 +107,7 @@ all() -> transcode_basic, transcode_options, transcode_mixed, - transcode_negative, - compose_query, - compose_query_negative, - dissect_query, - dissect_query_negative + transcode_negative ]. groups() -> @@ -829,47 +823,6 @@ transcode_negative(_Config) -> {error,invalid_input,<<"ö">>} = uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). -compose_query(_Config) -> - [] = uri_string:compose_query([]), - "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]), - "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,escaped_amp}]), - "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), - "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]), - "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]), - "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]), - <<"foo+bar=1&%C3%B6=2">> = - uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]). - -compose_query_negative(_Config) -> - {error,invalid_input,4} = uri_string:compose_query([{"",4}]), - {error,invalid_input,5} = uri_string:compose_query([{5,""}]), - {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]). - -dissect_query(_Config) -> - [] = uri_string:dissect_query(""), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), - [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"), - [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]), - [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"), - [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] = - uri_string:dissect_query(<<"foo=%C3%B6&bar=2">>), - [{"foo bar","1"},{"ö","2"}] = - uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2">>]). - -dissect_query_negative(_Config) -> - {error,invalid_character,";"} = - uri_string:dissect_query("foo=1≈bar=2"), - {error,invalid_character,"&"} = - uri_string:dissect_query("foo1&bar=2"), - {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"), - {error,invalid_input,<<153,182>>} = - uri_string:dissect_query("foo=%99%B6&bar=2"), - {error,invalid_character,"ö"} = uri_string:dissect_query("föo+bar=1&%C3%B6=2"), - {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2">>), - {error,invalid_input,<<"ö">>} = - uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2ö">>]). - normalize(_Config) -> "/a/g" = uri_string:normalize("/a/b/c/./../../g"), <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>), -- cgit v1.2.3