diff options
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 114 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 58 |
2 files changed, 56 insertions, 116 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 8283b8ca0e..496573ae2f 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -24,7 +24,7 @@ <title>maps</title> <prepared>Péter Dimitrov</prepared> <docno>1</docno> - <date>2017-08-23</date> + <date>2017-10-20</date> <rev>A</rev> </header> <module>uri_string</module> @@ -34,7 +34,8 @@ <p>A URI is an identifier consisting of a sequence of characters matching the syntax rule named <em>URI</em> in <em>RFC 3986</em>.</p> <p> The generic URI syntax consists of a hierarchical sequence of components referred - to as the scheme, authority, path, query, and fragment:<pre> + to as the scheme, authority, path, query, and fragment:</p> + <pre> URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute @@ -51,35 +52,26 @@ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" </pre><br></br> - </p> <p>The interpretation of a URI depends only on the characters used and not on how those characters are represented in a network protocol.</p> - <p>The functions implemented by this module covers the following use cases: + <p>The functions implemented by this module covers the following use cases:</p> <list type="bulleted"> <item>Parsing URIs<br></br> <c>parse/1</c></item> <item>Recomposing URIs<br></br> <c>recompose/2</c></item> - <item>Resolving URI references<br></br> - <c>resolve_uri_reference/3</c></item> - <item>Creating URI references<br></br> - <c>create_uri_reference/3</c></item> - <item>Normalizing URIs<br></br> - <c>normalize/1</c></item> <item>Transcoding URIs<br></br> <c>transcode/2</c></item> - <item>Working with urlencoded query strings<br></br> - <c>compose_query/1, dissect_query/1</c></item> + <item>Working with form-urlencoded query strings<br></br> + <c>compose_query/[1,2], dissect_query/1</c></item> </list> - </p> - <p>There are four different encodings present during the handling of URIs: + <p>There are four different encodings present during the handling of URIs:</p> <list type="bulleted"> <item>Inbound binary encoding in binaries</item> <item>Inbound percent-encoding in lists and binaries</item> <item>Outbound binary encoding in binaries</item> <item>Outbound percent-encoding in lists and binaries</item> </list> - </p> <p>Unless otherwise specified the return value type and encoding are the same as the input type and encoding. That is, binary input returns binary output, list input returns a list output but mixed input returns list output. Input and output encodings are the same except @@ -113,31 +105,34 @@ <name name="compose_query" arity="1"/> <fsummary>Compose urlencoded query string.</fsummary> <desc> - <p>Composes an urlencoded <c><anno>QueryString</anno></c> based on a + <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a <c><anno>QueryList</anno></c>, a list of unescaped key-value pairs. Media type <c>application/x-www-form-urlencoded</c> is defined in section - 8.2.1 of <c>RFC 1866</c> (HTML 2.0). + 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Reserved and unsafe characters, as + defined by RFC 1738 (Uniform Resource Locators), are procent-encoded. </p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> <p><em>Example:</em></p> <pre> -1> <input>uri_string:compose_query(...).</input> -</pre> +1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input> +<![CDATA["foo+bar=1&city=%C3%B6rebro"]]> + </pre> </desc> </func> <func> - <name name="create_uri_reference" arity="2"/> - <fsummary>Create references.</fsummary> + <name name="compose_query" arity="2"/> + <fsummary>Compose urlencoded query string.</fsummary> <desc> - <p>Creates an RFC 3986 compliant <c><anno>RelativeDestURI</anno></c>, - based <c><anno>AbsoluteSourceURI</anno></c> and <c><anno>AbsoluteSourceURI</anno></c> - </p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> + <p>Same as <c>compose_query/1</c> but with an additional + <c><anno>Options</anno></c> parameter, that controls the type of separator used + between key-value pairs. There are two supported separator types: <c>amp</c> (<![CDATA[&]]>) + and <c>semicolon</c> (;).</p> <p><em>Example:</em></p> <pre> -1> <input>uri_string:create_uri_reference(...,...).</input> -</pre> +1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input> +2> [{separator, semicolon}]). +"foo+bar=1;city=%C3%B6rebro" + </pre> </desc> </func> @@ -148,31 +143,14 @@ <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a <c><anno>QueryList</anno></c>, a list of unescaped key-value pairs. Media type <c>application/x-www-form-urlencoded</c> is defined in section - 8.2.1 of <c>RFC 1866</c> (HTML 2.0). + 8.2.1 of <c>RFC 1866</c> (HTML 2.0). Percent-encoded segments are decoded + as defined by RFC 1738 (Uniform Resource Locators). </p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> <p><em>Example:</em></p> <pre> -1> <input>uri_string:dissect_query(...).</input> -</pre> - </desc> - </func> - - <func> - <name name="normalize" arity="1"/> - <fsummary>Normalize URI.</fsummary> - <desc> - <p>Normalizes an RFC 3986 compliant <c><anno>URIString</anno></c> and returns - a <c><anno>NormalizedURI</anno></c>. The algorithm used to shorten the input - URI is called Syntax-Based Normalization and described at - <c>Section 6.2.2 of RFC 3986</c>. - </p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> - <p><em>Example:</em></p> - <pre> -1> <input>uri_string:normalize("http://example.org/one/two/../../one").</input> -"http://example.org/one" -</pre> +1> <input>uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro").</input> +[{"foo bar","1"},{"city","örebro"}] + </pre> </desc> </func> @@ -182,14 +160,14 @@ <desc> <p>Returns a <c>URIMap</c>, that is a <em>uri_map()</em> with the parsed components of the <c><anno>URIString</anno></c>.</p> - <p>If parsing fails, a <c>parse_error</c> exception is raised.</p> + <p>If parsing fails, an error tuple is returned.</p> <p><em>Example:</em></p> <pre> 1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input> #{fragment => "nose",host => "example.com", path => "/over/there",port => 8042,query => "name=ferret", scheme => foo,userinfo => "user"} -2> </pre> + </pre> </desc> </func> @@ -198,50 +176,34 @@ <fsummary>Recompose URI.</fsummary> <desc> <p>Returns an RFC 3986 compliant <c><anno>URIString</anno></c> (percent-encoded).</p> - <p>If the <c><anno>URIMap</anno></c> is invalid, a <c>badarg</c> exception is raised.</p> + <p>If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p> <p><em>Example:</em></p> <pre> 1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input> -port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}. +port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. #{fragment => "top",host => "example.com", path => "/over/there",port => 8042,query => "?name=ferret", scheme => foo,userinfo => "user"} -2> <input>uri_string:recompose(URIMap, []).</input> +2> <input>uri_string:recompose(URIMap).</input> "foo://example.com:8042/over/there?name=ferret#nose"</pre> </desc> </func> <func> - <name name="resolve_uri_reference" arity="2"/> - <fsummary>Resolve URI reference.</fsummary> - <desc> - <p>Resolves an RFC 3986 compliant <c><anno>RelativeURI</anno></c>, - based <c><anno>AbsoluteBaseURI</anno></c> and returns a new absolute URI - (<c><anno>AbsoluteDestURI</anno></c>).</p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> - <p><em>Example:</em></p> - <pre> -1> <input>uri_string:resolve_uri_reference(...,...).</input> -</pre> - </desc> - </func> - - <func> <name name="transcode" arity="2"/> <fsummary>Transcode URI.</fsummary> <desc> <p>Transcodes an RFC 3986 compliant <c><anno>URIString</anno></c>, where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings.</p> - <p>If an argument is invalid, a <c>badarg</c> exception is raised.</p> + <p>If an argument is invalid, an error tuple is returned.</p> <p><em>Example:</em></p> <pre> -1> <input>uri_string:transcode(<<"foo://f%20oo">>, [{in_encoding, utf8},</input> -{out_encoding, utf16}]). -<<0,102,0,111,0,111,0,58,0,47,0,47,0,102,0,37,0,48,0,48,0,37,0,50,0,48,0, - 111,0,111>> -</pre> +1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input> +2> [{in_encoding, utf32},{out_encoding, utf8}]). +<![CDATA[<<"foo%C3%B6bar"/utf8>>]]> + </pre> </desc> </func> diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 51f7564934..8723d3f183 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -226,9 +226,9 @@ %%------------------------------------------------------------------------- %% External API %%------------------------------------------------------------------------- --export([compose_query/1, compose_query/2, create_uri_reference/2, - dissect_query/1, normalize/1, parse/1, - recompose/1, resolve_uri_reference/2, transcode/2]). +-export([compose_query/1, compose_query/2, + dissect_query/1, parse/1, + recompose/1, transcode/2]). -export_type([uri_map/0, uri_string/0]). @@ -291,7 +291,8 @@ %%------------------------------------------------------------------------- -spec parse(URIString) -> URIMap when URIString :: uri_string(), - URIMap :: uri_map(). + URIMap :: uri_map() + | {error, atom(), list() | binary()}. parse(URIString) when is_binary(URIString) -> try parse_uri_reference(URIString, #{}) of Result -> Result @@ -315,7 +316,8 @@ parse(URIString) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), - URIString :: uri_string(). + URIString :: uri_string() + | {error, atom(), list() | binary()}. recompose(Map) -> case is_valid_map(Map) of false -> @@ -338,40 +340,13 @@ recompose(Map) -> %%------------------------------------------------------------------------- -%% Resolve references -%%------------------------------------------------------------------------- --spec resolve_uri_reference(RelativeURI, AbsoluteBaseURI) -> AbsoluteDestURI when - RelativeURI :: uri_string(), - AbsoluteBaseURI :: uri_string(), - AbsoluteDestURI :: uri_string(). -resolve_uri_reference(_,_) -> - "". - -%%------------------------------------------------------------------------- -%% Create references -%%------------------------------------------------------------------------- --spec create_uri_reference(AbsoluteSourceURI, AbsoluteBaseURI) -> RelativeDestURI when - AbsoluteSourceURI :: uri_string(), - AbsoluteBaseURI :: uri_string(), - RelativeDestURI :: uri_string(). -create_uri_reference(_,_) -> - "". - -%%------------------------------------------------------------------------- -%% Normalize URIs -%%------------------------------------------------------------------------- --spec normalize(URIString) -> NormalizedURI when - URIString :: uri_string(), - NormalizedURI :: uri_string(). -normalize(_) -> - "". - -%%------------------------------------------------------------------------- %% Transcode URIs %%------------------------------------------------------------------------- --spec transcode(URIString, Options) -> URIString when +-spec transcode(URIString, Options) -> Result when URIString :: uri_string(), - Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}]. + Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], + Result :: uri_string() + | {error, atom(), list() | binary()}. transcode(URIString, Options) when is_binary(URIString) -> try InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -407,7 +382,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec compose_query(QueryList) -> QueryString when QueryList :: [{uri_string(), uri_string()}], - QueryString :: string(). + QueryString :: string() + | {error, atom(), list() | binary()}. compose_query(List) -> compose_query(List, []). @@ -415,7 +391,8 @@ compose_query(List) -> -spec compose_query(QueryList, Options) -> QueryString when QueryList :: [{uri_string(), uri_string()}], Options :: [{separator, atom()}], - QueryString :: string(). + QueryString :: string() + | {error, atom(), list() | binary()}. compose_query([],_Options) -> []; compose_query(List, Options) -> @@ -439,7 +416,8 @@ compose_query([], _Options, Acc) -> %%------------------------------------------------------------------------- -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), - QueryList :: [{string(), string()}]. + QueryList :: [{string(), string()}] + | {error, atom(), list() | binary()}. dissect_query([]) -> []; dissect_query(QueryString) when is_binary(QueryString) -> @@ -1940,7 +1918,7 @@ form_urldecode(Cs) -> form_urldecode(<<>>, Acc) -> convert_list(Acc, utf8); form_urldecode(<<$+,T/binary>>, Acc) -> - form_urlencode(T, [$ |Acc]); + form_urldecode(T, <<Acc/binary,$ >>); form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> |