diff options
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 21 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 9 |
2 files changed, 19 insertions, 11 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 6b52ffdd4d..88d4600611 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -32,7 +32,11 @@ <description> <p>This module contains functions for parsing and handling URIs (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and - form-urlencoded query strings (<url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>). + form-urlencoded query strings (<url href="https://www.w3.org/TR/html52/">HTML 5.2</url>). + </p> + <p> + Parsing and serializing non-UTF-8 form-urlencoded query strings are also supported + (<url href="https://www.w3.org/TR/html50/">HTML 5.0</url>). </p> <p>A URI is an identifier consisting of a sequence of characters matching the syntax rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>. @@ -152,8 +156,10 @@ <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs. Form-urlencoding is defined in section - 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url> - specification. + 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url> + specification and in section 4.10.22.6 of the + <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for + non-UTF-8 encodings. </p> <p>See also the opposite operation <seealso marker="#dissect_query/1"> <c>dissect_query/1</c></seealso>. @@ -210,12 +216,11 @@ <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs. Form-urlencoding is defined in section - 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url> - specification. + 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url> + specification and in section 4.10.22.6 of the + <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for + non-UTF-8 encodings. </p> - <p>It is not as strict for its input as the decoding algorithm defined by - <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url> - and accepts all unicode characters.</p> <p>See also the opposite operation <seealso marker="#compose_query/1"> <c>compose_query/1</c></seealso>. </p> diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index ed3a2a10ac..28d36ea229 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -403,7 +403,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- %% Functions for working with the query part of a URI as a list %% of key/value pairs. -%% HTML5 - 4.10.22.6 URL-encoded form data +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8 %%------------------------------------------------------------------------- %%------------------------------------------------------------------------- @@ -1773,7 +1774,8 @@ get_separator(_L) -> <<"&">>. -%% HTML5 - 4.10.22.6 URL-encoded form data - encoding +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8) form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) -> B = convert_to_binary(Cs, utf8, utf8), html5_byte_encode(base10_encode(B)); @@ -1868,7 +1870,8 @@ dissect_query_value(<<>>, IsList, Acc, Key, Value) -> lists:reverse([{K,V}|Acc]). -%% Form-urldecode input based on RFC 1866 [8.2.1] +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8) form_urldecode(true, B) -> Result = base10_decode(form_urldecode(B, <<>>)), convert_to_list(Result, utf8); |