aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2018-02-13 15:48:09 +0100
committerPéter Dimitrov <[email protected]>2018-02-13 16:00:56 +0100
commitc903da9a67c4900c3113bd503c9fc3adaa85bb69 (patch)
treee1fb2032e211bde702dc0ac99236607fe13e8ed4
parentc69bbc7ce1af2dc295fc17fcb31485e2d4caafa7 (diff)
downloadotp-c903da9a67c4900c3113bd503c9fc3adaa85bb69.tar.gz
otp-c903da9a67c4900c3113bd503c9fc3adaa85bb69.tar.bz2
otp-c903da9a67c4900c3113bd503c9fc3adaa85bb69.zip
stdlib: Update uri_string documentation (HTML 5.2)
- Original link to HTML 5.0 specification was broken as the document was moved when later revisions were released. - Form-urlencoded query string handling conforms to the HTML 5.2 specification that references WHATWG URL (10 Jan 2018). - HTML 5.2 does not specify handling of non-UTF-8 form-urlencoded query strings, but it is still supported as described in HTML 5.0. Change-Id: I44603bb501530b16651ecbb9a26ea64e119f83d9
-rw-r--r--lib/stdlib/doc/src/uri_string.xml21
-rw-r--r--lib/stdlib/src/uri_string.erl9
2 files changed, 19 insertions, 11 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 6b52ffdd4d..88d4600611 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -32,7 +32,11 @@
<description>
<p>This module contains functions for parsing and handling URIs
(<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
- form-urlencoded query strings (<url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>).
+ form-urlencoded query strings (<url href="https://www.w3.org/TR/html52/">HTML 5.2</url>).
+ </p>
+ <p>
+ Parsing and serializing non-UTF-8 form-urlencoded query strings are also supported
+ (<url href="https://www.w3.org/TR/html50/">HTML 5.0</url>).
</p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
@@ -152,8 +156,10 @@
<p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
<c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
Form-urlencoding is defined in section
- 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- specification.
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
</p>
<p>See also the opposite operation <seealso marker="#dissect_query/1">
<c>dissect_query/1</c></seealso>.
@@ -210,12 +216,11 @@
<p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
<c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
Form-urlencoding is defined in section
- 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- specification.
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
</p>
- <p>It is not as strict for its input as the decoding algorithm defined by
- <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- and accepts all unicode characters.</p>
<p>See also the opposite operation <seealso marker="#compose_query/1">
<c>compose_query/1</c></seealso>.
</p>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index ed3a2a10ac..28d36ea229 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -403,7 +403,8 @@ transcode(URIString, Options) when is_list(URIString) ->
%%-------------------------------------------------------------------------
%% Functions for working with the query part of a URI as a list
%% of key/value pairs.
-%% HTML5 - 4.10.22.6 URL-encoded form data
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
@@ -1773,7 +1774,8 @@ get_separator(_L) ->
<<"&">>.
-%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8)
form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
B = convert_to_binary(Cs, utf8, utf8),
html5_byte_encode(base10_encode(B));
@@ -1868,7 +1870,8 @@ dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
lists:reverse([{K,V}|Acc]).
-%% Form-urldecode input based on RFC 1866 [8.2.1]
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8)
form_urldecode(true, B) ->
Result = base10_decode(form_urldecode(B, <<>>)),
convert_to_list(Result, utf8);