From b439d19d38479d6264d906dd926a168c9c514da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Fri, 20 Oct 2017 16:32:42 +0200 Subject: stdlib: Update documentation (uri_string) --- lib/stdlib/doc/src/uri_string.xml | 114 +++++++++++++------------------------- lib/stdlib/src/uri_string.erl | 58 ++++++------------- 2 files changed, 56 insertions(+), 116 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 8283b8ca0e..496573ae2f 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -24,7 +24,7 @@ maps Péter Dimitrov 1 - 2017-08-23 + 2017-10-20 A uri_string @@ -34,7 +34,8 @@

A URI is an identifier consisting of a sequence of characters matching the syntax rule named URI in RFC 3986.

The generic URI syntax consists of a hierarchical sequence of components referred - to as the scheme, authority, path, query, and fragment:

+    to as the scheme, authority, path, query, and fragment:

+
     URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
     hier-part   = "//" authority path-abempty
                    / path-absolute
@@ -51,35 +52,26 @@
 
     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
     


-

The interpretation of a URI depends only on the characters used and not on how those characters are represented in a network protocol.

-

The functions implemented by this module covers the following use cases: +

The functions implemented by this module covers the following use cases:

Parsing URIs

parse/1
Recomposing URIs

recompose/2
- Resolving URI references

- resolve_uri_reference/3
- Creating URI references

- create_uri_reference/3
- Normalizing URIs

- normalize/1
Transcoding URIs

transcode/2
- Working with urlencoded query strings

- compose_query/1, dissect_query/1
+ Working with form-urlencoded query strings

+ compose_query/[1,2], dissect_query/1
-

-

There are four different encodings present during the handling of URIs: +

There are four different encodings present during the handling of URIs:

Inbound binary encoding in binaries Inbound percent-encoding in lists and binaries Outbound binary encoding in binaries Outbound percent-encoding in lists and binaries -

Unless otherwise specified the return value type and encoding are the same as the input type and encoding. That is, binary input returns binary output, list input returns a list output but mixed input returns list output. Input and output encodings are the same except @@ -113,31 +105,34 @@ Compose urlencoded query string. -

Composes an urlencoded QueryString based on a +

Composes a form-urlencoded QueryString based on a QueryList, a list of unescaped key-value pairs. Media type application/x-www-form-urlencoded is defined in section - 8.2.1 of RFC 1866 (HTML 2.0). + 8.2.1 of RFC 1866 (HTML 2.0). Reserved and unsafe characters, as + defined by RFC 1738 (Uniform Resource Locators), are procent-encoded.

-

If an argument is invalid, a badarg exception is raised.

Example:

-1> uri_string:compose_query(...).
-
+1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]). + +
- - Create references. + + Compose urlencoded query string. -

Creates an RFC 3986 compliant RelativeDestURI, - based AbsoluteSourceURI and AbsoluteSourceURI -

-

If an argument is invalid, a badarg exception is raised.

+

Same as compose_query/1 but with an additional + Options parameter, that controls the type of separator used + between key-value pairs. There are two supported separator types: amp () + and semicolon (;).

Example:

-1> uri_string:create_uri_reference(...,...).
-
+1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], +2> [{separator, semicolon}]). +"foo+bar=1;city=%C3%B6rebro" +
@@ -148,31 +143,14 @@

Dissects an urlencoded QueryString and returns a QueryList, a list of unescaped key-value pairs. Media type application/x-www-form-urlencoded is defined in section - 8.2.1 of RFC 1866 (HTML 2.0). + 8.2.1 of RFC 1866 (HTML 2.0). Percent-encoded segments are decoded + as defined by RFC 1738 (Uniform Resource Locators).

-

If an argument is invalid, a badarg exception is raised.

Example:

-1> uri_string:dissect_query(...).
-
- - - - - - Normalize URI. - -

Normalizes an RFC 3986 compliant URIString and returns - a NormalizedURI. The algorithm used to shorten the input - URI is called Syntax-Based Normalization and described at - Section 6.2.2 of RFC 3986. -

-

If an argument is invalid, a badarg exception is raised.

-

Example:

-
-1> uri_string:normalize("http://example.org/one/two/../../one").
-"http://example.org/one"
-
+1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro"). +[{"foo bar","1"},{"city","örebro"}] +
@@ -182,14 +160,14 @@

Returns a URIMap, that is a uri_map() with the parsed components of the URIString.

-

If parsing fails, a parse_error exception is raised.

+

If parsing fails, an error tuple is returned.

Example:

 1> uri_string:parse("foo://user@example.com:8042/over/there?name=ferret#nose").
 #{fragment => "nose",host => "example.com",
   path => "/over/there",port => 8042,query => "name=ferret",
   scheme => foo,userinfo => "user"}
-2> 
+
@@ -198,35 +176,20 @@ Recompose URI.

Returns an RFC 3986 compliant URIString (percent-encoded).

-

If the URIMap is invalid, a badarg exception is raised.

+

If the URIMap is invalid, an error tuple is returned.

Example:

 1> URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",
-port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}.
+port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
 #{fragment => "top",host => "example.com",
   path => "/over/there",port => 8042,query => "?name=ferret",
   scheme => foo,userinfo => "user"}
 
-2> uri_string:recompose(URIMap, []).
+2> uri_string:recompose(URIMap).
 "foo://example.com:8042/over/there?name=ferret#nose"
- - - Resolve URI reference. - -

Resolves an RFC 3986 compliant RelativeURI, - based AbsoluteBaseURI and returns a new absolute URI - (AbsoluteDestURI).

-

If an argument is invalid, a badarg exception is raised.

-

Example:

-
-1> uri_string:resolve_uri_reference(...,...).
-
-
-
- Transcode URI. @@ -234,14 +197,13 @@ port => 8042, query => "name=ferret", scheme => foo, userinfo => "user"}.

Transcodes an RFC 3986 compliant URIString, where Options is a list of tagged tuples, specifying the inbound (in_encoding) and outbound (out_encoding) encodings.

-

If an argument is invalid, a badarg exception is raised.

+

If an argument is invalid, an error tuple is returned.

Example:

-1> uri_string:transcode(<<"foo://f%20oo">>, [{in_encoding, utf8},
-{out_encoding, utf16}]).
-<<0,102,0,111,0,111,0,58,0,47,0,47,0,102,0,37,0,48,0,48,0,37,0,50,0,48,0,
-  111,0,111>>
-
+1> >,]]> +2> [{in_encoding, utf32},{out_encoding, utf8}]). +>]]> +
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 51f7564934..8723d3f183 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -226,9 +226,9 @@ %%------------------------------------------------------------------------- %% External API %%------------------------------------------------------------------------- --export([compose_query/1, compose_query/2, create_uri_reference/2, - dissect_query/1, normalize/1, parse/1, - recompose/1, resolve_uri_reference/2, transcode/2]). +-export([compose_query/1, compose_query/2, + dissect_query/1, parse/1, + recompose/1, transcode/2]). -export_type([uri_map/0, uri_string/0]). @@ -291,7 +291,8 @@ %%------------------------------------------------------------------------- -spec parse(URIString) -> URIMap when URIString :: uri_string(), - URIMap :: uri_map(). + URIMap :: uri_map() + | {error, atom(), list() | binary()}. parse(URIString) when is_binary(URIString) -> try parse_uri_reference(URIString, #{}) of Result -> Result @@ -315,7 +316,8 @@ parse(URIString) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), - URIString :: uri_string(). + URIString :: uri_string() + | {error, atom(), list() | binary()}. recompose(Map) -> case is_valid_map(Map) of false -> @@ -337,41 +339,14 @@ recompose(Map) -> end. -%%------------------------------------------------------------------------- -%% Resolve references -%%------------------------------------------------------------------------- --spec resolve_uri_reference(RelativeURI, AbsoluteBaseURI) -> AbsoluteDestURI when - RelativeURI :: uri_string(), - AbsoluteBaseURI :: uri_string(), - AbsoluteDestURI :: uri_string(). -resolve_uri_reference(_,_) -> - "". - -%%------------------------------------------------------------------------- -%% Create references -%%------------------------------------------------------------------------- --spec create_uri_reference(AbsoluteSourceURI, AbsoluteBaseURI) -> RelativeDestURI when - AbsoluteSourceURI :: uri_string(), - AbsoluteBaseURI :: uri_string(), - RelativeDestURI :: uri_string(). -create_uri_reference(_,_) -> - "". - -%%------------------------------------------------------------------------- -%% Normalize URIs -%%------------------------------------------------------------------------- --spec normalize(URIString) -> NormalizedURI when - URIString :: uri_string(), - NormalizedURI :: uri_string(). -normalize(_) -> - "". - %%------------------------------------------------------------------------- %% Transcode URIs %%------------------------------------------------------------------------- --spec transcode(URIString, Options) -> URIString when +-spec transcode(URIString, Options) -> Result when URIString :: uri_string(), - Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}]. + Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], + Result :: uri_string() + | {error, atom(), list() | binary()}. transcode(URIString, Options) when is_binary(URIString) -> try InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -407,7 +382,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec compose_query(QueryList) -> QueryString when QueryList :: [{uri_string(), uri_string()}], - QueryString :: string(). + QueryString :: string() + | {error, atom(), list() | binary()}. compose_query(List) -> compose_query(List, []). @@ -415,7 +391,8 @@ compose_query(List) -> -spec compose_query(QueryList, Options) -> QueryString when QueryList :: [{uri_string(), uri_string()}], Options :: [{separator, atom()}], - QueryString :: string(). + QueryString :: string() + | {error, atom(), list() | binary()}. compose_query([],_Options) -> []; compose_query(List, Options) -> @@ -439,7 +416,8 @@ compose_query([], _Options, Acc) -> %%------------------------------------------------------------------------- -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), - QueryList :: [{string(), string()}]. + QueryList :: [{string(), string()}] + | {error, atom(), list() | binary()}. dissect_query([]) -> []; dissect_query(QueryString) when is_binary(QueryString) -> @@ -1940,7 +1918,7 @@ form_urldecode(Cs) -> form_urldecode(<<>>, Acc) -> convert_list(Acc, utf8); form_urldecode(<<$+,T/binary>>, Acc) -> - form_urlencode(T, [$ |Acc]); + form_urldecode(T, <>); form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> -- cgit v1.2.3