From c903da9a67c4900c3113bd503c9fc3adaa85bb69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Tue, 13 Feb 2018 15:48:09 +0100 Subject: stdlib: Update uri_string documentation (HTML 5.2) - Original link to HTML 5.0 specification was broken as the document was moved when later revisions were released. - Form-urlencoded query string handling conforms to the HTML 5.2 specification that references WHATWG URL (10 Jan 2018). - HTML 5.2 does not specify handling of non-UTF-8 form-urlencoded query strings, but it is still supported as described in HTML 5.0. Change-Id: I44603bb501530b16651ecbb9a26ea64e119f83d9 --- lib/stdlib/doc/src/uri_string.xml | 21 +++++++++++++-------- lib/stdlib/src/uri_string.erl | 9 ++++++--- 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 6b52ffdd4d..88d4600611 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -32,7 +32,11 @@

This module contains functions for parsing and handling URIs (RFC 3986) and - form-urlencoded query strings (HTML5). + form-urlencoded query strings (HTML 5.2). +

+

+ Parsing and serializing non-UTF-8 form-urlencoded query strings are also supported + (HTML 5.0).

A URI is an identifier consisting of a sequence of characters matching the syntax rule named URI in RFC 3986. @@ -152,8 +156,10 @@

Composes a form-urlencoded QueryString based on a QueryList, a list of non-percent-encoded key-value pairs. Form-urlencoding is defined in section - 4.10.22.6 of the HTML5 - specification. + 4.10.21.6 of the HTML 5.2 + specification and in section 4.10.22.6 of the + HTML 5.0 specification for + non-UTF-8 encodings.

See also the opposite operation dissect_query/1. @@ -210,12 +216,11 @@

Dissects an urlencoded QueryString and returns a QueryList, a list of non-percent-encoded key-value pairs. Form-urlencoding is defined in section - 4.10.22.6 of the HTML5 - specification. + 4.10.21.6 of the HTML 5.2 + specification and in section 4.10.22.6 of the + HTML 5.0 specification for + non-UTF-8 encodings.

-

It is not as strict for its input as the decoding algorithm defined by - HTML5 - and accepts all unicode characters.

See also the opposite operation compose_query/1.

diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index ed3a2a10ac..28d36ea229 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -403,7 +403,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- %% Functions for working with the query part of a URI as a list %% of key/value pairs. -%% HTML5 - 4.10.22.6 URL-encoded form data +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8 %%------------------------------------------------------------------------- %%------------------------------------------------------------------------- @@ -1773,7 +1774,8 @@ get_separator(_L) -> <<"&">>. -%% HTML5 - 4.10.22.6 URL-encoded form data - encoding +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8) form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) -> B = convert_to_binary(Cs, utf8, utf8), html5_byte_encode(base10_encode(B)); @@ -1868,7 +1870,8 @@ dissect_query_value(<<>>, IsList, Acc, Key, Value) -> lists:reverse([{K,V}|Acc]). -%% Form-urldecode input based on RFC 1866 [8.2.1] +%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 +%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8) form_urldecode(true, B) -> Result = base10_decode(form_urldecode(B, <<>>)), convert_to_list(Result, utf8); -- cgit v1.2.3