aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/stdlib/doc/src/uri_string.xml117
-rw-r--r--lib/stdlib/src/uri_string.erl44
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl2
3 files changed, 109 insertions, 54 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index d67c687fd1..8322eecb24 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -30,10 +30,13 @@
<module>uri_string</module>
<modulesummary>URI processing functions.</modulesummary>
<description>
- <p>This module contains functions for parsing and handling URIs (RFC 3986) and
- form-urlencoded query strings (RFC 1866).</p>
+ <p>This module contains functions for parsing and handling URIs
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
+ form-urlencoded query strings (<url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>).
+ </p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
- rule named <em>URI</em> in <em>RFC 3986</em>.</p>
+ rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
+ </p>
<p> The generic URI syntax consists of a hierarchical sequence of components referred
to as the scheme, authority, path, query, and fragment:</p>
<pre>
@@ -55,16 +58,24 @@
</pre><br></br>
<p>The interpretation of a URI depends only on the characters used and not on how those
characters are represented in a network protocol.</p>
- <p>The functions implemented by this module covers the following use cases:</p>
+ <p>The functions implemented by this module cover the following use cases:</p>
<list type="bulleted">
- <item>Parsing URIs<br></br>
- <c>parse/1</c></item>
- <item>Recomposing URIs<br></br>
- <c>recompose/2</c></item>
- <item>Transcoding URIs<br></br>
- <c>transcode/2</c></item>
- <item>Working with form-urlencoded query strings<br></br>
- <c>compose_query/[1,2], dissect_query/1</c></item>
+ <item>Parsing URIs into its components and returing a map<br></br>
+ <seealso marker="#parse/1"><c>parse/1</c></seealso>
+ </item>
+ <item>Recomposing a map of URI components into a URI string<br></br>
+ <seealso marker="#recompose/1"><c>recompose/1</c></seealso>
+ </item>
+ <item>Changing inbound binary and percent-encoding of URIs<br></br>
+ <seealso marker="#transcode/2"><c>transcode/2</c></seealso>
+ </item>
+ <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
+ <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
+ <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso>
+ </item>
+ <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
+ <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso>
+ </item>
</list>
<p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
@@ -75,8 +86,7 @@
</list>
<p>Unless otherwise specified the return value type and encoding are the same as the input
type and encoding. That is, binary input returns binary output, list input returns a list
- output but mixed input returns list output. Input and output encodings are the same except
- for <c>transcode/2</c>.</p>
+ output but mixed input returns list output.</p>
<p>All of the functions but <c>transcode/2</c> expects input as unicode codepoints in
lists, UTF-8 encoding in binaries and UTF-8 encoding in percent-encoded URI parts.
<c>transcode/2</c> provides the means to convert between the supported URI encodings.</p>
@@ -84,6 +94,22 @@
<datatypes>
<datatype>
+ <name name="error"/>
+ <desc>
+ <p>Error tuple indicating the type of error. Possible values of the second component:</p>
+ <list type="bulleted">
+ <item><c>invalid_character</c></item>
+ <item><c>invalid_input</c></item>
+ <item><c>invalid_map</c></item>
+ <item><c>invalid_percent_encoding</c></item>
+ <item><c>invalid_scheme</c></item>
+ <item><c>invalid_uri</c></item>
+ <item><c>invalid_utf8</c></item>
+ <item><c>missing_value</c></item>
+ </list>
+ </desc>
+ </datatype>
+ <datatype>
<name name="uri_map"/>
<desc>
<p>URI map holding the main components of a URI.</p>
@@ -93,7 +119,8 @@
<name name="uri_string"/>
<desc>
<p>List of unicode codepoints, UTF-8 encoded binary, or a mix of the two,
- representing an RFC 3986 compliant URI (<em>percent-encoded form</em>).
+ representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant URI (<em>percent-encoded form</em>).
A URI is a sequence of characters from a very limited set: the letters of
the basic Latin alphabet, digits, and a few special characters.</p>
</desc>
@@ -109,13 +136,21 @@
<p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
<c><anno>QueryList</anno></c>, a list of unescaped key-value pairs.
Media type <c>application/x-www-form-urlencoded</c> is defined in section
- 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Reserved and unsafe characters, as
- defined by RFC 1738 (Uniform Resource Locators), are percent-encoded.
+ 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>
+ (HTML 2.0). Reserved and unsafe characters, as
+ defined by <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url>
+ (Uniform Resource Locators), are percent-encoded.</p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
</p>
<p><em>Example:</em></p>
<pre>
-1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
-<![CDATA["foo+bar=1&amp;city=%C3%B6rebro"]]>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
+1> [{separator, semicolon}]).
+"foo+bar=1;city=%C3%B6rebro"
+2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
+<![CDATA[<<"foo+bar=1&amp;city=%C3%B6rebro">>]]>
</pre>
</desc>
</func>
@@ -127,11 +162,14 @@
<p>Same as <c>compose_query/1</c> but with an additional
<c><anno>Options</anno></c> parameter, that controls the type of separator used
between key-value pairs. There are three supported separator types: <c>amp</c> (<![CDATA[&]]>), <c>escaped_amp</c> (<![CDATA[&amp;]]>) and <c>semicolon</c> (;). If the parameter <c><anno>Options</anno></c> is empty, separator takes the default value (<c>escaped_amp</c>).</p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
<p><em>Example:</em></p>
<pre>
1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
-2> [{separator, semicolon}]).
-"foo+bar=1;city=%C3%B6rebro"
+1> [{separator, amp}]).
+<![CDATA["foo+bar=1&city=%C3%B6rebro"]]>
</pre>
</desc>
</func>
@@ -143,13 +181,19 @@
<p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
<c><anno>QueryList</anno></c>, a list of unescaped key-value pairs.
Media type <c>application/x-www-form-urlencoded</c> is defined in section
- 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Percent-encoded segments are decoded
- as defined by RFC 1738 (Uniform Resource Locators).
+ 8.2.1 of <url href="https://www.ietf.org/rfc/rfc1866.txt">RFC 1866</url>
+ (HTML 2.0). Percent-encoded segments are decoded
+ as defined by <url href="https://www.ietf.org/rfc/rfc1738.txt">RFC 1738</url>
+ (Uniform Resource Locators).</p>
+ <p>See also the opposite operation <seealso marker="#compose_query/1">
+ <c>compose_query/1</c></seealso>.
</p>
<p><em>Example:</em></p>
<pre>
1> <input>uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").</input>
[{"foo bar","1"},{"city","örebro"}]
+2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1;city=%C3%B6rebro">>).]]>
+<![CDATA[[{<<"foo bar">>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>
</pre>
</desc>
</func>
@@ -159,14 +203,19 @@
<fsummary>Parse URI into a map.</fsummary>
<desc>
<p>Returns a <c>URIMap</c>, that is a <em>uri_map()</em> with the parsed components
- of the <c><anno>URIString</anno></c>.</p>
- <p>If parsing fails, an error tuple is returned.</p>
+ of the <c><anno>URIString</anno></c>. If parsing fails, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#recompose/1">
+ <c>recompose/1</c></seealso>.</p>
<p><em>Example:</em></p>
<pre>
1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input>
#{fragment => "nose",host => "example.com",
path => "/over/there",port => 8042,query => "name=ferret",
scheme => foo,userinfo => "user"}
+2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]>
+<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>,
+ port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>,
+ userinfo => <<"user">>}]]>
</pre>
</desc>
</func>
@@ -175,12 +224,15 @@
<name name="recompose" arity="1"/>
<fsummary>Recompose URI.</fsummary>
<desc>
- <p>Returns an RFC 3986 compliant <c><anno>URIString</anno></c> (percent-encoded).</p>
- <p>If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
+ <p>Returns an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant
+ <c><anno>URIString</anno></c> (percent-encoded).
+ If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#parse/1">
+ <c>parse/1</c></seealso>.</p>
<p><em>Example:</em></p>
<pre>
1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input>
-port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
+1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
#{fragment => "top",host => "example.com",
path => "/over/there",port => 8042,query => "?name=ferret",
scheme => foo,userinfo => "user"}
@@ -194,14 +246,15 @@ port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
<name name="transcode" arity="2"/>
<fsummary>Transcode URI.</fsummary>
<desc>
- <p>Transcodes an RFC 3986 compliant <c><anno>URIString</anno></c>,
+ <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c><anno>URIString</anno></c>,
where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound
- (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings.</p>
- <p>If an argument is invalid, an error tuple is returned.</p>
+ (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings.
+ If an argument is invalid, an error tuple is returned.</p>
<p><em>Example:</em></p>
<pre>
1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input>
-2> [{in_encoding, utf32},{out_encoding, utf8}]).
+1> [{in_encoding, utf32},{out_encoding, utf8}]).
<![CDATA[<<"foo%C3%B6bar"/utf8>>]]>
</pre>
</desc>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 09bf4aef1d..ca212284d2 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -229,7 +229,7 @@
-export([compose_query/1, compose_query/2,
dissect_query/1, parse/1,
recompose/1, transcode/2]).
--export_type([uri_map/0, uri_string/0]).
+-export_type([error/0, uri_map/0, uri_string/0]).
%%-------------------------------------------------------------------------
@@ -273,6 +273,8 @@
%% %x96 ` grave / accent
%%-------------------------------------------------------------------------
-type uri_string() :: iodata().
+-type error() :: {error, atom(), list() | binary()}.
+
%%-------------------------------------------------------------------------
%% RFC 3986, Chapter 3. Syntax Components
@@ -292,7 +294,7 @@
-spec parse(URIString) -> URIMap when
URIString :: uri_string(),
URIMap :: uri_map()
- | {error, atom(), list() | binary()}.
+ | error().
parse(URIString) when is_binary(URIString) ->
try parse_uri_reference(URIString, #{}) of
Result -> Result
@@ -317,7 +319,7 @@ parse(URIString) when is_list(URIString) ->
-spec recompose(URIMap) -> URIString when
URIMap :: uri_map(),
URIString :: uri_string()
- | {error, atom(), list() | binary()}.
+ | error().
recompose(Map) ->
case is_valid_map(Map) of
false ->
@@ -346,7 +348,7 @@ recompose(Map) ->
URIString :: uri_string(),
Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
Result :: uri_string()
- | {error, atom(), list() | binary()}.
+ | error().
transcode(URIString, Options) when is_binary(URIString) ->
try
InEnc = proplists:get_value(in_encoding, Options, utf8),
@@ -357,7 +359,7 @@ transcode(URIString, Options) when is_binary(URIString) ->
of
Result -> Result
catch
- throw:{error, _, RestData} -> {error, invalid_input, RestData}
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
end;
transcode(URIString, Options) when is_list(URIString) ->
InEnc = proplists:get_value(in_encoding, Options, utf8),
@@ -366,7 +368,7 @@ transcode(URIString, Options) when is_list(URIString) ->
try transcode(Flattened, [], InEnc, OutEnc) of
Result -> Result
catch
- throw:{error, _, RestData} -> {error, invalid_input, RestData}
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
end.
@@ -382,8 +384,8 @@ transcode(URIString, Options) when is_list(URIString) ->
%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
QueryList :: [{uri_string(), uri_string()}],
- QueryString :: string()
- | {error, atom(), list() | binary()}.
+ QueryString :: uri_string()
+ | error().
compose_query(List) ->
compose_query(List, []).
@@ -391,8 +393,8 @@ compose_query(List) ->
-spec compose_query(QueryList, Options) -> QueryString when
QueryList :: [{uri_string(), uri_string()}],
Options :: [{separator, atom()}],
- QueryString :: string()
- | {error, atom(), list() | binary()}.
+ QueryString :: uri_string()
+ | error().
compose_query([],_Options) ->
[];
compose_query(List, Options) ->
@@ -421,8 +423,8 @@ compose_query([], _Options, IsList, Acc) ->
%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
- QueryList :: [{string(), string()}]
- | {error, atom(), list() | binary()}.
+ QueryList :: [{uri_string(), uri_string()}]
+ | error().
dissect_query(<<>>) ->
[];
dissect_query([]) ->
@@ -1249,9 +1251,9 @@ decode_fragment(Cs) ->
check_utf8(Cs) ->
case unicode:characters_to_list(Cs) of
{incomplete,_,_} ->
- throw({error,non_utf8,Cs});
+ throw({error,invalid_utf8,Cs});
{error,_,_} ->
- throw({error,non_utf8,Cs});
+ throw({error,invalid_utf8,Cs});
_ -> Cs
end.
@@ -1304,12 +1306,12 @@ decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
true ->
B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
decode(Cs, Fun, <<Acc/binary, B>>);
- false -> throw({error,percent_decode,<<$%,C0,C1>>})
+ false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
end;
decode(<<C,Cs/binary>>, Fun, Acc) ->
case Fun(C) of
true -> decode(Cs, Fun, <<Acc/binary, C>>);
- false -> throw({error,percent_decode,<<C,Cs/binary>>})
+ false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
end;
decode(<<>>, _Fun, Acc) ->
Acc.
@@ -1339,7 +1341,7 @@ encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
C = encode_codepoint_binary(Char, Fun),
encode(Rest, Fun, <<Acc/binary,C/binary>>);
encode(<<Char, Rest/binary>>, _Fun, _Acc) ->
- throw({error,percent_encode,<<Char,Rest/binary>>});
+ throw({error,invalid_input,<<Char,Rest/binary>>});
encode(<<>>, _Fun, Acc) ->
Acc.
@@ -1647,12 +1649,12 @@ transcode([], Acc, List, _InEncoding, _OutEncoding) ->
%% Transcode percent-encoded segment
-transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) ->
+transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) ->
case is_hex_digit(C0) andalso is_hex_digit(C1) of
true ->
Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
- false -> throw({error, lists:reverse(Acc),[C0,C1]})
+ false -> throw({error, invalid_percent_encoding,L})
end;
transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
OutBinary = convert_binary(B, InEncoding, OutEncoding),
@@ -1706,7 +1708,7 @@ flatten_list([H|T], InEnc, Acc) ->
flatten_list([], _InEnc, Acc) ->
lists:reverse(Acc);
flatten_list(Arg, _, _) ->
- throw({error, badarg, Arg}).
+ throw({error, invalid_input, Arg}).
percent_encode_segment(Segment) ->
@@ -1752,7 +1754,7 @@ form_urlencode(<<H/utf8,T/binary>>, Acc) ->
form_urlencode(<<H,_T/binary>>, _Acc) ->
throw({error,invalid_utf8,<<H>>});
form_urlencode(H, _Acc) ->
- throw({error,badarg, H}).
+ throw({error,invalid_input, H}).
%% Return true if input char can appear in URL according to
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 2fc4e1a092..95a49f5eb3 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -819,7 +819,7 @@ transcode_mixed(_Config) ->
uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]).
transcode_negative(_Config) ->
- {error,invalid_input,"BX"} =
+ {error,invalid_percent_encoding,"%BXbar"} =
uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
{error,invalid_input,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).