aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/uri_string.xml114
-rw-r--r--lib/stdlib/src/uri_string.erl58
2 files changed, 56 insertions, 116 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 8283b8ca0e..496573ae2f 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -24,7 +24,7 @@
<title>maps</title>
<prepared>Péter Dimitrov</prepared>
<docno>1</docno>
- <date>2017-08-23</date>
+ <date>2017-10-20</date>
<rev>A</rev>
</header>
<module>uri_string</module>
@@ -34,7 +34,8 @@
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <em>RFC 3986</em>.</p>
<p> The generic URI syntax consists of a hierarchical sequence of components referred
- to as the scheme, authority, path, query, and fragment:<pre>
+ to as the scheme, authority, path, query, and fragment:</p>
+ <pre>
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
hier-part = "//" authority path-abempty
/ path-absolute
@@ -51,35 +52,26 @@
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
</pre><br></br>
- </p>
<p>The interpretation of a URI depends only on the characters used and not on how those
characters are represented in a network protocol.</p>
- <p>The functions implemented by this module covers the following use cases:
+ <p>The functions implemented by this module covers the following use cases:</p>
<list type="bulleted">
<item>Parsing URIs<br></br>
<c>parse/1</c></item>
<item>Recomposing URIs<br></br>
<c>recompose/2</c></item>
- <item>Resolving URI references<br></br>
- <c>resolve_uri_reference/3</c></item>
- <item>Creating URI references<br></br>
- <c>create_uri_reference/3</c></item>
- <item>Normalizing URIs<br></br>
- <c>normalize/1</c></item>
<item>Transcoding URIs<br></br>
<c>transcode/2</c></item>
- <item>Working with urlencoded query strings<br></br>
- <c>compose_query/1, dissect_query/1</c></item>
+ <item>Working with form-urlencoded query strings<br></br>
+ <c>compose_query/[1,2], dissect_query/1</c></item>
</list>
- </p>
- <p>There are four different encodings present during the handling of URIs:
+ <p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
<item>Inbound binary encoding in binaries</item>
<item>Inbound percent-encoding in lists and binaries</item>
<item>Outbound binary encoding in binaries</item>
<item>Outbound percent-encoding in lists and binaries</item>
</list>
- </p>
<p>Unless otherwise specified the return value type and encoding are the same as the input
type and encoding. That is, binary input returns binary output, list input returns a list
output but mixed input returns list output. Input and output encodings are the same except
@@ -113,31 +105,34 @@
<name name="compose_query" arity="1"/>
<fsummary>Compose urlencoded query string.</fsummary>
<desc>
- <p>Composes an urlencoded <c><anno>QueryString</anno></c> based on a
+ <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
<c><anno>QueryList</anno></c>, a list of unescaped key-value pairs.
Media type <c>application/x-www-form-urlencoded</c> is defined in section
- 8.2.1 of <c>RFC 1866</c> (HTML 2.0).
+ 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Reserved and unsafe characters, as
+ defined by RFC 1738 (Uniform Resource Locators), are procent-encoded.
</p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
<p><em>Example:</em></p>
<pre>
-1> <input>uri_string:compose_query(...).</input>
-</pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
+<![CDATA["foo+bar=1&amp;city=%C3%B6rebro"]]>
+ </pre>
</desc>
</func>
<func>
- <name name="create_uri_reference" arity="2"/>
- <fsummary>Create references.</fsummary>
+ <name name="compose_query" arity="2"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
<desc>
- <p>Creates an RFC 3986 compliant <c><anno>RelativeDestURI</anno></c>,
- based <c><anno>AbsoluteSourceURI</anno></c> and <c><anno>AbsoluteSourceURI</anno></c>
- </p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
+ <p>Same as <c>compose_query/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls the type of separator used
+ between key-value pairs. There are two supported separator types: <c>amp</c> (<![CDATA[&amp;]]>)
+ and <c>semicolon</c> (;).</p>
<p><em>Example:</em></p>
<pre>
-1> <input>uri_string:create_uri_reference(...,...).</input>
-</pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
+2> [{separator, semicolon}]).
+"foo+bar=1;city=%C3%B6rebro"
+ </pre>
</desc>
</func>
@@ -148,31 +143,14 @@
<p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
<c><anno>QueryList</anno></c>, a list of unescaped key-value pairs.
Media type <c>application/x-www-form-urlencoded</c> is defined in section
- 8.2.1 of <c>RFC 1866</c> (HTML 2.0).
+ 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Percent-encoded segments are decoded
+ as defined by RFC 1738 (Uniform Resource Locators).
</p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
<p><em>Example:</em></p>
<pre>
-1> <input>uri_string:dissect_query(...).</input>
-</pre>
- </desc>
- </func>
-
- <func>
- <name name="normalize" arity="1"/>
- <fsummary>Normalize URI.</fsummary>
- <desc>
- <p>Normalizes an RFC 3986 compliant <c><anno>URIString</anno></c> and returns
- a <c><anno>NormalizedURI</anno></c>. The algorithm used to shorten the input
- URI is called Syntax-Based Normalization and described at
- <c>Section 6.2.2 of RFC 3986</c>.
- </p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
- <p><em>Example:</em></p>
- <pre>
-1> <input>uri_string:normalize("http://example.org/one/two/../../one").</input>
-"http://example.org/one"
-</pre>
+1> <input>uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").</input>
+[{"foo bar","1"},{"city","örebro"}]
+ </pre>
</desc>
</func>
@@ -182,14 +160,14 @@
<desc>
<p>Returns a <c>URIMap</c>, that is a <em>uri_map()</em> with the parsed components
of the <c><anno>URIString</anno></c>.</p>
- <p>If parsing fails, a <c>parse_error</c> exception is raised.</p>
+ <p>If parsing fails, an error tuple is returned.</p>
<p><em>Example:</em></p>
<pre>
1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input>
#{fragment => "nose",host => "example.com",
path => "/over/there",port => 8042,query => "name=ferret",
scheme => foo,userinfo => "user"}
-2> </pre>
+ </pre>
</desc>
</func>
@@ -198,50 +176,34 @@
<fsummary>Recompose URI.</fsummary>
<desc>
<p>Returns an RFC 3986 compliant <c><anno>URIString</anno></c> (percent-encoded).</p>
- <p>If the <c><anno>URIMap</anno></c> is invalid, a <c>badarg</c> exception is raised.</p>
+ <p>If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
<p><em>Example:</em></p>
<pre>
1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input>
-port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}.
+port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
#{fragment => "top",host => "example.com",
path => "/over/there",port => 8042,query => "?name=ferret",
scheme => foo,userinfo => "user"}
-2> <input>uri_string:recompose(URIMap, []).</input>
+2> <input>uri_string:recompose(URIMap).</input>
"foo://example.com:8042/over/there?name=ferret#nose"</pre>
</desc>
</func>
<func>
- <name name="resolve_uri_reference" arity="2"/>
- <fsummary>Resolve URI reference.</fsummary>
- <desc>
- <p>Resolves an RFC 3986 compliant <c><anno>RelativeURI</anno></c>,
- based <c><anno>AbsoluteBaseURI</anno></c> and returns a new absolute URI
- (<c><anno>AbsoluteDestURI</anno></c>).</p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
- <p><em>Example:</em></p>
- <pre>
-1> <input>uri_string:resolve_uri_reference(...,...).</input>
-</pre>
- </desc>
- </func>
-
- <func>
<name name="transcode" arity="2"/>
<fsummary>Transcode URI.</fsummary>
<desc>
<p>Transcodes an RFC 3986 compliant <c><anno>URIString</anno></c>,
where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound
(<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings.</p>
- <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p>
+ <p>If an argument is invalid, an error tuple is returned.</p>
<p><em>Example:</em></p>
<pre>
-1> <input>uri_string:transcode(&lt;&lt;"foo://f%20oo"&gt;&gt;, [{in_encoding, utf8},</input>
-{out_encoding, utf16}]).
-&lt;&lt;0,102,0,111,0,111,0,58,0,47,0,47,0,102,0,37,0,48,0,48,0,37,0,50,0,48,0,
- 111,0,111&gt;&gt;
-</pre>
+1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input>
+2> [{in_encoding, utf32},{out_encoding, utf8}]).
+<![CDATA[<<"foo%C3%B6bar"/utf8>>]]>
+ </pre>
</desc>
</func>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 51f7564934..8723d3f183 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,9 +226,9 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([compose_query/1, compose_query/2, create_uri_reference/2,
- dissect_query/1, normalize/1, parse/1,
- recompose/1, resolve_uri_reference/2, transcode/2]).
+-export([compose_query/1, compose_query/2,
+ dissect_query/1, parse/1,
+ recompose/1, transcode/2]).
-export_type([uri_map/0, uri_string/0]).
@@ -291,7 +291,8 @@
%%-------------------------------------------------------------------------
-spec parse(URIString) -> URIMap when
URIString :: uri_string(),
- URIMap :: uri_map().
+ URIMap :: uri_map()
+ | {error, atom(), list() | binary()}.
parse(URIString) when is_binary(URIString) ->
try parse_uri_reference(URIString, #{}) of
Result -> Result
@@ -315,7 +316,8 @@ parse(URIString) when is_list(URIString) ->
%%-------------------------------------------------------------------------
-spec recompose(URIMap) -> URIString when
URIMap :: uri_map(),
- URIString :: uri_string().
+ URIString :: uri_string()
+ | {error, atom(), list() | binary()}.
recompose(Map) ->
case is_valid_map(Map) of
false ->
@@ -338,40 +340,13 @@ recompose(Map) ->
%%-------------------------------------------------------------------------
-%% Resolve references
-%%-------------------------------------------------------------------------
--spec resolve_uri_reference(RelativeURI, AbsoluteBaseURI) -> AbsoluteDestURI when
- RelativeURI :: uri_string(),
- AbsoluteBaseURI :: uri_string(),
- AbsoluteDestURI :: uri_string().
-resolve_uri_reference(_,_) ->
- "".
-
-%%-------------------------------------------------------------------------
-%% Create references
-%%-------------------------------------------------------------------------
--spec create_uri_reference(AbsoluteSourceURI, AbsoluteBaseURI) -> RelativeDestURI when
- AbsoluteSourceURI :: uri_string(),
- AbsoluteBaseURI :: uri_string(),
- RelativeDestURI :: uri_string().
-create_uri_reference(_,_) ->
- "".
-
-%%-------------------------------------------------------------------------
-%% Normalize URIs
-%%-------------------------------------------------------------------------
--spec normalize(URIString) -> NormalizedURI when
- URIString :: uri_string(),
- NormalizedURI :: uri_string().
-normalize(_) ->
- "".
-
-%%-------------------------------------------------------------------------
%% Transcode URIs
%%-------------------------------------------------------------------------
--spec transcode(URIString, Options) -> URIString when
+-spec transcode(URIString, Options) -> Result when
URIString :: uri_string(),
- Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}].
+ Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
+ Result :: uri_string()
+ | {error, atom(), list() | binary()}.
transcode(URIString, Options) when is_binary(URIString) ->
try
InEnc = proplists:get_value(in_encoding, Options, utf8),
@@ -407,7 +382,8 @@ transcode(URIString, Options) when is_list(URIString) ->
%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
QueryList :: [{uri_string(), uri_string()}],
- QueryString :: string().
+ QueryString :: string()
+ | {error, atom(), list() | binary()}.
compose_query(List) ->
compose_query(List, []).
@@ -415,7 +391,8 @@ compose_query(List) ->
-spec compose_query(QueryList, Options) -> QueryString when
QueryList :: [{uri_string(), uri_string()}],
Options :: [{separator, atom()}],
- QueryString :: string().
+ QueryString :: string()
+ | {error, atom(), list() | binary()}.
compose_query([],_Options) ->
[];
compose_query(List, Options) ->
@@ -439,7 +416,8 @@ compose_query([], _Options, Acc) ->
%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
- QueryList :: [{string(), string()}].
+ QueryList :: [{string(), string()}]
+ | {error, atom(), list() | binary()}.
dissect_query([]) ->
[];
dissect_query(QueryString) when is_binary(QueryString) ->
@@ -1940,7 +1918,7 @@ form_urldecode(Cs) ->
form_urldecode(<<>>, Acc) ->
convert_list(Acc, utf8);
form_urldecode(<<$+,T/binary>>, Acc) ->
- form_urlencode(T, [$ |Acc]);
+ form_urldecode(T, <<Acc/binary,$ >>);
form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->