aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-11-07 15:13:36 +0100
committerGitHub <[email protected]>2017-11-07 15:13:36 +0100
commit6db8210068a55696cd5e444d40d3676737113d03 (patch)
treea6dbf506a3692a7c4ee9789d1cfeb9eda37da694
parentf642b74ff710ed1093d218689ac96566d4889fec (diff)
parent7e5d062973e7cb4f9ee949529e9dcdb5785c1304 (diff)
downloadotp-6db8210068a55696cd5e444d40d3676737113d03.tar.gz
otp-6db8210068a55696cd5e444d40d3676737113d03.tar.bz2
otp-6db8210068a55696cd5e444d40d3676737113d03.zip
Merge pull request #1551 from peterdmv/peterdmv/stdlib/add_uri_string/OTP-14496
Implement new uri_string module in stdlib
-rw-r--r--lib/stdlib/doc/src/Makefile1
-rw-r--r--lib/stdlib/doc/src/ref_man.xml1
-rw-r--r--lib/stdlib/doc/src/specs.xml1
-rw-r--r--lib/stdlib/doc/src/uri_string.xml230
-rw-r--r--lib/stdlib/src/Makefile1
-rw-r--r--lib/stdlib/src/stdlib.app.src1
-rw-r--r--lib/stdlib/src/uri_string.erl1842
-rw-r--r--lib/stdlib/test/Makefile1
-rw-r--r--lib/stdlib/test/property_test/README12
-rw-r--r--lib/stdlib/test/property_test/uri_string_recompose.erl361
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl844
-rw-r--r--lib/stdlib/test/uri_string_property_test_SUITE.erl39
12 files changed, 3334 insertions, 0 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile
index 93eac8220d..aeed79408b 100644
--- a/lib/stdlib/doc/src/Makefile
+++ b/lib/stdlib/doc/src/Makefile
@@ -98,6 +98,7 @@ XML_REF3_FILES = \
sys.xml \
timer.xml \
unicode.xml \
+ uri_string.xml \
win32reg.xml \
zip.xml
diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml
index 878a3babc5..68bfddbc71 100644
--- a/lib/stdlib/doc/src/ref_man.xml
+++ b/lib/stdlib/doc/src/ref_man.xml
@@ -93,6 +93,7 @@
<xi:include href="sys.xml"/>
<xi:include href="timer.xml"/>
<xi:include href="unicode.xml"/>
+ <xi:include href="uri_string.xml"/>
<xi:include href="win32reg.xml"/>
<xi:include href="zip.xml"/>
</application>
diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml
index 45b207b13d..d559adf9b6 100644
--- a/lib/stdlib/doc/src/specs.xml
+++ b/lib/stdlib/doc/src/specs.xml
@@ -60,6 +60,7 @@
<xi:include href="../specs/specs_sys.xml"/>
<xi:include href="../specs/specs_timer.xml"/>
<xi:include href="../specs/specs_unicode.xml"/>
+ <xi:include href="../specs/specs_uri_string.xml"/>
<xi:include href="../specs/specs_win32reg.xml"/>
<xi:include href="../specs/specs_zip.xml"/>
</specs>
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
new file mode 100644
index 0000000000..9ace2b0a05
--- /dev/null
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>2017</year><year>2017</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ </legalnotice>
+
+ <title>uri_string</title>
+ <prepared>Péter Dimitrov</prepared>
+ <docno>1</docno>
+ <date>2017-10-24</date>
+ <rev>A</rev>
+ </header>
+ <module>uri_string</module>
+ <modulesummary>URI processing functions.</modulesummary>
+ <description>
+ <p>This module contains functions for parsing and handling URIs
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>).
+ </p>
+ <p>A URI is an identifier consisting of a sequence of characters matching the syntax
+ rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
+ </p>
+ <p> The generic URI syntax consists of a hierarchical sequence of components referred
+ to as the scheme, authority, path, query, and fragment:</p>
+ <pre>
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ </pre><br></br>
+ <p>The interpretation of a URI depends only on the characters used and not on how those
+ characters are represented in a network protocol.</p>
+ <p>The functions implemented by this module cover the following use cases:</p>
+ <list type="bulleted">
+ <item>Parsing URIs into its components and returing a map<br></br>
+ <seealso marker="#parse/1"><c>parse/1</c></seealso>
+ </item>
+ <item>Recomposing a map of URI components into a URI string<br></br>
+ <seealso marker="#recompose/1"><c>recompose/1</c></seealso>
+ </item>
+ <item>Changing inbound binary and percent-encoding of URIs<br></br>
+ <seealso marker="#transcode/2"><c>transcode/2</c></seealso>
+ </item>
+ <item>Transforming URIs into a normalized form<br></br>
+ <seealso marker="#normalize/1"><c>normalize/1</c></seealso>
+ </item>
+ </list>
+ <p>There are four different encodings present during the handling of URIs:</p>
+ <list type="bulleted">
+ <item>Inbound binary encoding in binaries</item>
+ <item>Inbound percent-encoding in lists and binaries</item>
+ <item>Outbound binary encoding in binaries</item>
+ <item>Outbound percent-encoding in lists and binaries</item>
+ </list>
+ <p>Functions with <c>uri_string()</c> argument accept lists, binaries and
+ mixed lists (lists with binary elements) as input type. All of the functions but
+ <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries
+ and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p>
+ <p>Unless otherwise specified the return value type and encoding are the same as the input
+ type and encoding. That is, binary input returns binary output, list input returns a list
+ output but mixed input returns list output.</p>
+ <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding
+ and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert
+ between the supported encodings, it takes a <c>uri_string()</c> and a list of options
+ specifying inbound and outbound encodings.</p>
+ <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific
+ character encoding and it is usually defined by the protocol or surrounding text. This library
+ takes the same assumption, binary and percent-encoding are handled as one configuration unit,
+ they cannot be set to different values.</p>
+ </description>
+
+ <datatypes>
+ <datatype>
+ <name name="error"/>
+ <desc>
+ <p>Error tuple indicating the type of error. Possible values of the second component:</p>
+ <list type="bulleted">
+ <item><c>invalid_input</c></item>
+ <item><c>invalid_map</c></item>
+ <item><c>invalid_percent_encoding</c></item>
+ <item><c>invalid_scheme</c></item>
+ <item><c>invalid_uri</c></item>
+ <item><c>invalid_utf8</c></item>
+ </list>
+ <p>The third component is a term providing additional information about the
+ cause of the error.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_map"/>
+ <desc>
+ <p>Map holding the main components of a URI.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_string"/>
+ <desc>
+ <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two,
+ representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant URI (<em>percent-encoded form</em>).
+ A URI is a sequence of characters from a very limited set: the letters of
+ the basic Latin alphabet, digits, and a few special characters.</p>
+ </desc>
+ </datatype>
+ </datatypes>
+
+ <funcs>
+
+ <func>
+ <name name="normalize" arity="1"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Transforms <c><anno>URIString</anno></c> into a normalized form
+ using Syntax-Based Normalization as defined by
+ <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p>
+ <p>This function implements case normalization, percent-encoding
+ normalization, path segment normalization and scheme based normalization
+ for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g").</input>
+"/a/g"
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]>
+<![CDATA[<<"mid/6">>]]>
+3> uri_string:normalize("http://localhost:80").
+"https://localhost/"
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="parse" arity="1"/>
+ <fsummary>Parse URI into a map.</fsummary>
+ <desc>
+ <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed
+ components of the <c>URI</c>.
+ If parsing fails, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#recompose/1">
+ <c>recompose/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input>
+#{fragment => "nose",host => "example.com",
+ path => "/over/there",port => 8042,query => "name=ferret",
+ scheme => foo,userinfo => "user"}
+2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]>
+<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>,
+ port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>,
+ userinfo => <<"user">>}]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="recompose" arity="1"/>
+ <fsummary>Recompose URI.</fsummary>
+ <desc>
+ <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant
+ <c><anno>URIString</anno></c> (percent-encoded), based on the components of
+ <c><anno>URIMap</anno></c>.
+ If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#parse/1">
+ <c>parse/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input>
+1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
+#{fragment => "top",host => "example.com",
+ path => "/over/there",port => 8042,query => "?name=ferret",
+ scheme => foo,userinfo => "user"}
+
+2> <input>uri_string:recompose(URIMap).</input>
+"foo://example.com:8042/over/there?name=ferret#nose"</pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="transcode" arity="2"/>
+ <fsummary>Transcode URI.</fsummary>
+ <desc>
+ <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c><anno>URIString</anno></c>,
+ where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound
+ (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c>
+ and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the
+ input and output data. Mixed encoding, where binary encoding is not the same as
+ percent-encoding, is not supported.
+ If an argument is invalid, an error tuple is returned.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input>
+1> [{in_encoding, utf32},{out_encoding, utf8}]).
+<![CDATA[<<"foo%C3%B6bar"/utf8>>]]>
+2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1},
+2> {out_encoding, utf8}]).
+"foo%C3%B6bar"
+ </pre>
+ </desc>
+ </func>
+
+ </funcs>
+</erlref>
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index bf836203ec..8b156929d7 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -121,6 +121,7 @@ MODULES= \
timer \
unicode \
unicode_util \
+ uri_string \
win32reg \
zip
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index ab0824ca17..5fb48acfab 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -101,6 +101,7 @@
timer,
unicode,
unicode_util,
+ uri_string,
win32reg,
zip]},
{registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
new file mode 100644
index 0000000000..22212da222
--- /dev/null
+++ b/lib/stdlib/src/uri_string.erl
@@ -0,0 +1,1842 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%
+%% [RFC 3986, Chapter 3. Syntax Components]
+%%
+%% The generic URI syntax consists of a hierarchical sequence of
+%% components referred to as the scheme, authority, path, query, and
+%% fragment.
+%%
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%%
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%
+%% The scheme and path components are required, though the path may be
+%% empty (no characters). When authority is present, the path must
+%% either be empty or begin with a slash ("/") character. When
+%% authority is not present, the path cannot begin with two slash
+%% characters ("//"). These restrictions result in five different ABNF
+%% rules for a path (Section 3.3), only one of which will match any
+%% given URI reference.
+%%
+%% The following are two example URIs and their component parts:
+%%
+%% foo://example.com:8042/over/there?name=ferret#nose
+%% \_/ \______________/\_________/ \_________/ \__/
+%% | | | | |
+%% scheme authority path query fragment
+%% | _____________________|__
+%% / \ / \
+%% urn:example:animal:ferret:nose
+%%
+%%
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%
+%%
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%
+%%
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%
+%%
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%
+%%
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%
+%%
+%% [RFC 3986, Chapter 4.3. Absolute URI]
+%%
+%% Some protocol elements allow only the absolute form of a URI without
+%% a fragment identifier. For example, defining a base URI for later
+%% use by relative references calls for an absolute-URI syntax rule that
+%% does not allow a fragment.
+%%
+%% absolute-URI = scheme ":" hier-part [ "?" query ]
+%%
+-module(uri_string).
+
+%%-------------------------------------------------------------------------
+%% External API
+%%-------------------------------------------------------------------------
+-export([normalize/1, parse/1,
+ recompose/1, transcode/2]).
+-export_type([error/0, uri_map/0, uri_string/0]).
+
+
+%%-------------------------------------------------------------------------
+%% Internal API
+%%-------------------------------------------------------------------------
+-export([is_host/1, is_path/1]). % suppress warnings
+
+
+%%-------------------------------------------------------------------------
+%% Macros
+%%-------------------------------------------------------------------------
+-define(CHAR(Char), <<Char/utf8>>).
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/binary>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(DEC2HEX(X),
+ if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0;
+ ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10
+ end).
+
+-define(HEX2DEC(X),
+ if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0;
+ ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10;
+ ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10
+ end).
+
+
+%%%=========================================================================
+%%% API
+%%%=========================================================================
+
+%%-------------------------------------------------------------------------
+%% URI compliant with RFC 3986
+%% ASCII %x21 - %x7A ("!" - "z") except
+%% %x34 " double quote
+%% %x60 < less than
+%% %x62 > greater than
+%% %x92 \ backslash
+%% %x94 ^ caret / circumflex
+%% %x96 ` grave / accent
+%%-------------------------------------------------------------------------
+-type uri_string() :: iodata().
+-type error() :: {error, atom(), term()}.
+
+
+%%-------------------------------------------------------------------------
+%% RFC 3986, Chapter 3. Syntax Components
+%%-------------------------------------------------------------------------
+-type uri_map() ::
+ #{fragment => unicode:chardata(),
+ host => unicode:chardata(),
+ path => unicode:chardata(),
+ port => non_neg_integer() | undefined,
+ query => unicode:chardata(),
+ scheme => unicode:chardata(),
+ userinfo => unicode:chardata()} | #{}.
+
+
+%%-------------------------------------------------------------------------
+%% Normalize URIs
+%%-------------------------------------------------------------------------
+-spec normalize(URIString) -> NormalizedURI when
+ URIString :: uri_string(),
+ NormalizedURI :: uri_string().
+normalize(URIString) ->
+ %% Percent-encoding normalization and case normalization for
+ %% percent-encoded triplets are achieved by running parse and
+ %% recompose on the input URI string.
+ recompose(
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(
+ parse(URIString))))).
+
+
+%%-------------------------------------------------------------------------
+%% Parse URIs
+%%-------------------------------------------------------------------------
+-spec parse(URIString) -> URIMap when
+ URIString :: uri_string(),
+ URIMap :: uri_map()
+ | error().
+parse(URIString) when is_binary(URIString) ->
+ try parse_uri_reference(URIString, #{})
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+parse(URIString) when is_list(URIString) ->
+ try
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Recompose URIs
+%%-------------------------------------------------------------------------
+-spec recompose(URIMap) -> URIString when
+ URIMap :: uri_map(),
+ URIString :: uri_string()
+ | error().
+recompose(Map) ->
+ case is_valid_map(Map) of
+ false ->
+ {error, invalid_map, Map};
+ true ->
+ try
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Transcode URIs
+%%-------------------------------------------------------------------------
+-spec transcode(URIString, Options) -> Result when
+ URIString :: uri_string(),
+ Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
+ Result :: uri_string()
+ | error().
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_to_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_to_binary(Output, utf8, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ Flattened = flatten_list(URIString, InEnc),
+ try transcode(Flattened, [], InEnc, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
+%%-------------------------------------------------------------------------
+%% Converts Map fields to lists
+%%-------------------------------------------------------------------------
+convert_mapfields_to_list(Map) ->
+ Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V);
+ (_, V) -> V end,
+ maps:map(Fun, Map).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%-------------------------------------------------------------------------
+-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
+parse_uri_reference(URIString, URI) ->
+ try parse_scheme_start(URIString, URI)
+ catch
+ throw:{_,_,_} ->
+ parse_relative_part(URIString, URI)
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_relative_part(binary(), uri_map()) -> uri_map().
+parse_relative_part(?STRING_REST("//", Rest), URI) ->
+ %% Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
+ end;
+parse_relative_part(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST($/, Path))};
+parse_relative_part(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(Query)};
+parse_relative_part(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
+parse_relative_part(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true ->
+ {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST(Char, Path))};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% path-abempty
+%%-------------------------------------------------------------------------
+-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment(?STRING_REST(Char, Rest), URI) ->
+ case is_pchar(Char) of
+ true -> parse_segment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% path-noscheme
+%%-------------------------------------------------------------------------
+-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true -> parse_segment_nz_nc(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment_nz_nc(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is pchar.
+-spec is_pchar(char()) -> boolean().
+is_pchar($%) -> true; % pct-encoded
+is_pchar($:) -> true;
+is_pchar($@) -> true;
+is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is segment_nz_nc.
+-spec is_segment_nz_nc(char()) -> boolean().
+is_segment_nz_nc($%) -> true; % pct-encoded
+is_segment_nz_nc($@) -> true;
+is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%-------------------------------------------------------------------------
+-spec parse_scheme_start(binary(), uri_map()) -> uri_map().
+parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
+ case is_alpha(Char) of
+ true -> {T, URI1} = parse_scheme(Rest, URI),
+ Scheme = calculate_parsed_scheme(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
+-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_scheme(?STRING_REST($:, Rest), URI) ->
+ {_, URI1} = parse_hier(Rest, URI),
+ {Rest, URI1};
+parse_scheme(?STRING_REST(Char, Rest), URI) ->
+ case is_scheme(Char) of
+ true -> parse_scheme(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_scheme(?STRING_EMPTY, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in scheme
+-spec is_scheme(char()) -> boolean().
+is_scheme($+) -> true;
+is_scheme($-) -> true;
+is_scheme($.) -> true;
+is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+
+%%-------------------------------------------------------------------------
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_hier(?STRING_REST("//", Rest), URI) ->
+ % Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
+ end;
+parse_hier(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_hier(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_hier(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
+ case is_pchar(Char) of
+ true -> % segment_nz
+ {T, URI1} = parse_segment(Rest, URI),
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_hier(?STRING_EMPTY, URI) ->
+ {<<>>, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% The authority component is preceded by a double slash ("//") and is
+%% terminated by the next slash ("/"), question mark ("?"), or number
+%% sign ("#") character, or by the end of the URI.
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%-------------------------------------------------------------------------
+-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
+parse_userinfo(?STRING_REST($@, Rest), URI) ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
+parse_userinfo(?STRING_REST(Char, Rest), URI) ->
+ case is_userinfo(Char) of
+ true -> parse_userinfo(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_userinfo(?STRING_EMPTY, _URI) ->
+ %% URI cannot end in userinfo state
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in userinfo
+-spec is_userinfo(char()) -> boolean().
+is_userinfo($%) -> true; % pct-encoded
+is_userinfo($:) -> true;
+is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%-------------------------------------------------------------------------
+-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_host(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_host(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_host(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_host(?STRING_REST($[, Rest), URI) ->
+ parse_ipv6_bin(Rest, [], URI);
+parse_host(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_host(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_ipv4_bin(Rest, [Char], URI);
+ false -> parse_reg_name(?STRING_REST(Char, Rest), URI)
+ end;
+parse_host(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_reg_name(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_reg_name(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_reg_name(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_reg_name(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_reg_name(?STRING_REST(Char, Rest), URI) ->
+ case is_reg_name(Char) of
+ true -> parse_reg_name(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_reg_name(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+%% Check if char is allowed in reg-name
+-spec is_reg_name(char()) -> boolean().
+is_reg_name($%) -> true;
+is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv4(Char) of
+ true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in IPv4 addresses
+-spec is_ipv4(char()) -> boolean().
+is_ipv4($.) -> true;
+is_ipv4(Char) -> is_digit(Char).
+
+-spec validate_ipv4_address(list()) -> list().
+validate_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) ->
+ _ = validate_ipv6_address(lists:reverse(Acc)),
+ parse_ipv6_bin_end(Rest, URI);
+parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+%% Check if char is allowed in IPv6 addresses
+-spec is_ipv6(char()) -> boolean().
+is_ipv6($:) -> true;
+is_ipv6($.) -> true;
+is_ipv6(Char) -> is_hex_digit(Char).
+
+
+-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin_end(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+-spec validate_ipv6_address(list()) -> list().
+validate_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%-------------------------------------------------------------------------
+-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_port(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_port(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_port(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_port(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_port(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_port(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_query(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_query(?STRING_REST(Char, Rest), URI) ->
+ case is_query(Char) of
+ true -> parse_query(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_query(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in query
+-spec is_query(char()) -> boolean().
+is_query($/) -> true;
+is_query($?) -> true;
+is_query(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_fragment(?STRING_REST(Char, Rest), URI) ->
+ case is_fragment(Char) of
+ true -> parse_fragment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_fragment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in fragment
+-spec is_fragment(char()) -> boolean().
+is_fragment($/) -> true;
+is_fragment($?) -> true;
+is_fragment(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%-------------------------------------------------------------------------
+
+%% Check if char is sub-delim.
+-spec is_sub_delim(char()) -> boolean().
+is_sub_delim($!) -> true;
+is_sub_delim($$) -> true;
+is_sub_delim($&) -> true;
+is_sub_delim($') -> true;
+is_sub_delim($() -> true;
+is_sub_delim($)) -> true;
+
+is_sub_delim($*) -> true;
+is_sub_delim($+) -> true;
+is_sub_delim($,) -> true;
+is_sub_delim($;) -> true;
+is_sub_delim($=) -> true;
+is_sub_delim(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%-------------------------------------------------------------------------
+-spec is_unreserved(char()) -> boolean().
+is_unreserved($-) -> true;
+is_unreserved($.) -> true;
+is_unreserved($_) -> true;
+is_unreserved($~) -> true;
+is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+-spec is_alpha(char()) -> boolean().
+is_alpha(C)
+ when $A =< C, C =< $Z;
+ $a =< C, C =< $z -> true;
+is_alpha(_) -> false.
+
+-spec is_digit(char()) -> boolean().
+is_digit(C)
+ when $0 =< C, C =< $9 -> true;
+is_digit(_) -> false.
+
+-spec is_hex_digit(char()) -> boolean().
+is_hex_digit(C)
+ when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true;
+is_hex_digit(_) -> false.
+
+
+%% Remove enclosing brackets from binary
+-spec remove_brackets(binary()) -> binary().
+remove_brackets(<<$[/utf8, Rest/binary>>) ->
+ {H,T} = split_binary(Rest, byte_size(Rest) - 1),
+ case T =:= <<$]/utf8>> of
+ true -> H;
+ false -> Rest
+ end;
+remove_brackets(Addr) -> Addr.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, <<>>) ->
+ strip_last_char(Input, [$:]);
+calculate_parsed_scheme(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#]);
+calculate_parsed_part(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#,$@]);
+calculate_parsed_userinfo(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_host_port(binary(), binary()) -> binary().
+calculate_parsed_host_port(Input, <<>>) ->
+ strip_last_char(Input, [$:,$?,$#,$/]);
+calculate_parsed_host_port(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+calculate_parsed_query_fragment(Input, <<>>) ->
+ strip_last_char(Input, [$#]);
+calculate_parsed_query_fragment(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+get_port(<<>>) ->
+ undefined;
+get_port(B) ->
+ try binary_to_integer(B)
+ catch
+ error:badarg ->
+ throw({error, invalid_uri, B})
+ end.
+
+
+%% Strip last char if it is in list
+%%
+%% This function is optimized for speed: parse/1 is about 10% faster than
+%% with an alternative implementation based on lists and sets.
+strip_last_char(<<>>, _) -> <<>>;
+strip_last_char(Input, [C0]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2,C3]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ C3 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end.
+
+
+%% Get parsed binary
+get_parsed_binary(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.1. Percent-Encoding]
+%%
+%% A percent-encoding mechanism is used to represent a data octet in a
+%% component when that octet's corresponding character is outside the
+%% allowed set or is being used as a delimiter of, or within, the
+%% component. A percent-encoded octet is encoded as a character
+%% triplet, consisting of the percent character "%" followed by the two
+%% hexadecimal digits representing that octet's numeric value. For
+%% example, "%20" is the percent-encoding for the binary octet
+%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+%% character (SP). Section 2.4 describes when percent-encoding and
+%% decoding is applied.
+%%
+%% pct-encoded = "%" HEXDIG HEXDIG
+%%-------------------------------------------------------------------------
+-spec decode_userinfo(binary()) -> binary().
+decode_userinfo(Cs) ->
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
+
+-spec decode_host(binary()) -> binary().
+decode_host(Cs) ->
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
+
+-spec decode_path(binary()) -> binary().
+decode_path(Cs) ->
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
+
+-spec decode_query(binary()) -> binary().
+decode_query(Cs) ->
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
+
+-spec decode_fragment(binary()) -> binary().
+decode_fragment(Cs) ->
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+
+
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ {error,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ _ -> Cs
+ end.
+
+%%-------------------------------------------------------------------------
+%% Percent-encode
+%%-------------------------------------------------------------------------
+
+%% Only validates as scheme cannot have percent-encoded characters
+-spec encode_scheme(list()|binary()) -> list() | binary().
+encode_scheme([]) ->
+ throw({error,invalid_scheme,""});
+encode_scheme(<<>>) ->
+ throw({error,invalid_scheme,<<>>});
+encode_scheme(Scheme) ->
+ case validate_scheme(Scheme) of
+ true -> Scheme;
+ false -> throw({error,invalid_scheme,Scheme})
+ end.
+
+-spec encode_userinfo(list()|binary()) -> list() | binary().
+encode_userinfo(Cs) ->
+ encode(Cs, fun is_userinfo/1).
+
+-spec encode_host(list()|binary()) -> list() | binary().
+encode_host(Cs) ->
+ case classify_host(Cs) of
+ regname -> Cs;
+ ipv4 -> Cs;
+ ipv6 -> bracket_ipv6(Cs);
+ other -> encode(Cs, fun is_reg_name/1)
+ end.
+
+-spec encode_path(list()|binary()) -> list() | binary().
+encode_path(Cs) ->
+ encode(Cs, fun is_path/1).
+
+-spec encode_query(list()|binary()) -> list() | binary().
+encode_query(Cs) ->
+ encode(Cs, fun is_query/1).
+
+-spec encode_fragment(list()|binary()) -> list() | binary().
+encode_fragment(Cs) ->
+ encode(Cs, fun is_fragment/1).
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for percent-decode
+%%-------------------------------------------------------------------------
+decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
+ end;
+decode(<<C,Cs/binary>>, Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
+ end;
+decode(<<>>, _Fun, Acc) ->
+ Acc.
+
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+is_path(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for percent-encode
+%%-------------------------------------------------------------------------
+-spec encode(list()|binary(), fun()) -> list() | binary().
+encode(Component, Fun) when is_list(Component) ->
+ B = unicode:characters_to_binary(Component),
+ unicode:characters_to_list(encode(B, Fun, <<>>));
+encode(Component, Fun) when is_binary(Component) ->
+ encode(Component, Fun, <<>>).
+%%
+encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
+ C = encode_codepoint_binary(Char, Fun),
+ encode(Rest, Fun, <<Acc/binary,C/binary>>);
+encode(<<Char, Rest/binary>>, _Fun, _Acc) ->
+ throw({error,invalid_input,<<Char,Rest/binary>>});
+encode(<<>>, _Fun, Acc) ->
+ Acc.
+
+
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
+encode_codepoint_binary(C, Fun) ->
+ case Fun(C) of
+ false -> percent_encode_binary(C);
+ true -> <<C>>
+ end.
+
+
+-spec percent_encode_binary(integer()) -> binary().
+percent_encode_binary(Code) ->
+ percent_encode_binary(<<Code/utf8>>, <<>>).
+
+
+percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) ->
+ percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
+
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<<H, Rest/binary>>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> other;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%%
+%% The implementation is based on a decision tree that fulfills the
+%% following rules:
+%% - 'path' shall always be present in the input map
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%% - 'host' shall be present in the input map when 'path' starts with
+%% two slashes ("//")
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%% path-abempty = *( "/" segment )
+%% segment = *pchar
+%% - 'host' shall be present if userinfo or port is present in input map
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%% - All fields shall be valid (scheme, userinfo, host, port, path, query
+%% or fragment).
+%%-------------------------------------------------------------------------
+is_valid_map(#{path := Path} = Map) ->
+ ((starts_with_two_slash(Path) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(port, Map) andalso is_valid_map_host(Map))
+ orelse
+ all_fields_valid(Map));
+is_valid_map(#{}) ->
+ false.
+
+
+is_valid_map_host(Map) ->
+ maps:is_key(host, Map) andalso all_fields_valid(Map).
+
+
+all_fields_valid(Map) ->
+ Fun = fun(scheme, _, Acc) -> Acc;
+ (userinfo, _, Acc) -> Acc;
+ (host, _, Acc) -> Acc;
+ (port, _, Acc) -> Acc;
+ (path, _, Acc) -> Acc;
+ (query, _, Acc) -> Acc;
+ (fragment, _, Acc) -> Acc;
+ (_, _, _) -> false
+ end,
+ maps:fold(Fun, true, Map).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := undefined}, URI) ->
+ concat(URI, <<":">>);
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,add_question_mark(encode_query(Query)));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_fragment(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <<A/binary, B/binary>>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_question_mark(Comp) when is_binary(Comp) ->
+ <<$?, Comp/binary>>;
+add_question_mark(Comp) when is_list(Comp) ->
+ [$?|Comp].
+
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>.
+
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <<Comp/binary,$:>>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
+ false -> throw({error, invalid_percent_encoding,L})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_to_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+%% Convert to binary
+convert_to_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Convert to list
+convert_to_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Flatten input list
+flatten_list([], _) ->
+ [];
+flatten_list(L, InEnc) ->
+ flatten_list(L, InEnc, []).
+%%
+flatten_list([H|T], InEnc, Acc) when is_binary(H) ->
+ L = convert_to_list(H, InEnc),
+ flatten_list(T, InEnc, lists:reverse(L) ++ Acc);
+flatten_list([H|T], InEnc, Acc) when is_list(H) ->
+ flatten_list(H ++ T, InEnc, Acc);
+flatten_list([H|T], InEnc, Acc) ->
+ flatten_list(T, InEnc, [H|Acc]);
+flatten_list([], _InEnc, Acc) ->
+ lists:reverse(Acc);
+flatten_list(Arg, _, _) ->
+ throw({error, invalid_input, Arg}).
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for normalize
+%%-------------------------------------------------------------------------
+
+%% 6.2.2.1. Case Normalization
+normalize_case(#{scheme := Scheme, host := Host} = Map) ->
+ Map#{scheme => to_lower(Scheme),
+ host => to_lower(Host)};
+normalize_case(#{host := Host} = Map) ->
+ Map#{host => to_lower(Host)};
+normalize_case(#{scheme := Scheme} = Map) ->
+ Map#{scheme => to_lower(Scheme)};
+normalize_case(#{} = Map) ->
+ Map.
+
+
+to_lower(Cs) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ convert_to_list(to_lower(B), utf8);
+to_lower(Cs) when is_binary(Cs) ->
+ to_lower(Cs, <<>>).
+%%
+to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z ->
+ to_lower(Cs, <<Acc/binary,(C + 32)>>);
+to_lower(<<C,Cs/binary>>, Acc) ->
+ to_lower(Cs, <<Acc/binary,C>>);
+to_lower(<<>>, Acc) ->
+ Acc.
+
+
+%% 6.2.2.3. Path Segment Normalization
+%% 5.2.4. Remove Dot Segments
+normalize_path_segment(Map) ->
+ Path = maps:get(path, Map, undefined),
+ Map#{path => remove_dot_segments(Path)}.
+
+
+remove_dot_segments(Path) when is_binary(Path) ->
+ remove_dot_segments(Path, <<>>);
+remove_dot_segments(Path) when is_list(Path) ->
+ B = convert_to_binary(Path, utf8, utf8),
+ B1 = remove_dot_segments(B, <<>>),
+ convert_to_list(B1, utf8).
+%%
+remove_dot_segments(<<>>, Output) ->
+ Output;
+remove_dot_segments(<<"../",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"./",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"/./",T/binary>>, Output) ->
+ remove_dot_segments(<<$/,T/binary>>, Output);
+remove_dot_segments(<<"/.">>, Output) ->
+ remove_dot_segments(<<$/>>, Output);
+remove_dot_segments(<<"/../",T/binary>>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/,T/binary>>, Out1);
+remove_dot_segments(<<"/..">>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/>>, Out1);
+remove_dot_segments(<<$.>>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(<<"..">>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(Input, Output) ->
+ {First, Rest} = first_path_segment(Input),
+ remove_dot_segments(Rest, <<Output/binary,First/binary>>).
+
+
+first_path_segment(Input) ->
+ F = first_path_segment(Input, <<>>),
+ split_binary(Input, byte_size(F)).
+%%
+first_path_segment(<<$/,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>);
+first_path_segment(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+first_path_segment_end(<<>>, Acc) ->
+ Acc;
+first_path_segment_end(<<$/,_/binary>>, Acc) ->
+ Acc;
+first_path_segment_end(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+remove_last_segment(<<>>) ->
+ <<>>;
+remove_last_segment(B) ->
+ {Init, Last} = split_binary(B, byte_size(B) - 1),
+ case Last of
+ <<$/>> ->
+ Init;
+ _Char ->
+ remove_last_segment(Init)
+ end.
+
+
+%% RFC 3986, 6.2.3. Scheme-Based Normalization
+normalize_scheme_based(Map) ->
+ Scheme = maps:get(scheme, Map, undefined),
+ Port = maps:get(port, Map, undefined),
+ Path= maps:get(path, Map, undefined),
+ normalize_scheme_based(Map, Scheme, Port, Path).
+%%
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "http"; Scheme =:= <<"http">> ->
+ normalize_http(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "https"; Scheme =:= <<"https">> ->
+ normalize_https(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ftp"; Scheme =:= <<"ftp">> ->
+ normalize_ftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ssh"; Scheme =:= <<"ssh">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "sftp"; Scheme =:= <<"sftp">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "tftp"; Scheme =:= <<"tftp">> ->
+ normalize_tftp(Map, Port);
+normalize_scheme_based(Map, _, _, _) ->
+ Map.
+
+
+normalize_http(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 80),
+ normalize_http_path(M1, Path).
+
+
+normalize_https(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 443),
+ normalize_http_path(M1, Path).
+
+
+normalize_ftp(Map, Port) ->
+ normalize_port(Map, Port, 21).
+
+
+normalize_ssh_sftp(Map, Port) ->
+ normalize_port(Map, Port, 22).
+
+
+normalize_tftp(Map, Port) ->
+ normalize_port(Map, Port, 69).
+
+
+normalize_port(Map, Port, Default) ->
+ case Port of
+ Default ->
+ maps:remove(port, Map);
+ _Else ->
+ Map
+ end.
+
+
+normalize_http_path(Map, Path) ->
+ case Path of
+ "" ->
+ Map#{path => "/"};
+ <<>> ->
+ Map#{path => <<"/">>};
+ _Else ->
+ Map
+ end.
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 523cb95065..8490770f3d 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -87,6 +87,7 @@ MODULES= \
timer_simple_SUITE \
unicode_SUITE \
unicode_util_SUITE \
+ uri_string_SUITE \
win32reg_SUITE \
y2k_SUITE \
select_SUITE \
diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README
new file mode 100644
index 0000000000..57602bf719
--- /dev/null
+++ b/lib/stdlib/test/property_test/README
@@ -0,0 +1,12 @@
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% %%%
+%%% WARNING %%%
+%%% %%%
+%%% This is experimental code which may be changed or removed %%%
+%%% anytime without any warning. %%%
+%%% %%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr.
+
diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl
new file mode 100644
index 0000000000..e51a671172
--- /dev/null
+++ b/lib/stdlib/test/property_test/uri_string_recompose.erl
@@ -0,0 +1,361 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_recompose).
+
+-compile(export_all).
+
+-proptest(eqc).
+-proptest([triq,proper]).
+
+-ifndef(EQC).
+-ifndef(PROPER).
+-ifndef(TRIQ).
+-define(EQC,true).
+-endif.
+-endif.
+-endif.
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-define(MOD_eqc,eqc).
+
+-else.
+-ifdef(PROPER).
+-include_lib("proper/include/proper.hrl").
+-define(MOD_eqc,proper).
+
+-else.
+-ifdef(TRIQ).
+-define(MOD_eqc,triq).
+-include_lib("triq/include/triq.hrl").
+
+-endif.
+-endif.
+-endif.
+
+
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(SCHEME, {scheme, scheme()}).
+-define(USER, {userinfo, unicode()}).
+-define(HOST, {host, host_map()}).
+-define(PORT, {port, port()}).
+-define(PATH_ABE, {path, path_abempty_map()}).
+-define(PATH_ABS, {path, path_absolute_map()}).
+-define(PATH_NOS, {path, path_noscheme_map()}).
+-define(PATH_ROO, {path, path_rootless_map()}).
+-define(PATH_EMP, {path, path_empty_map()}).
+-define(QUERY, {query, query_map()}).
+-define(FRAGMENT, {fragment, fragment_map()}).
+
+
+%%%========================================================================
+%%% Properties
+%%%========================================================================
+
+prop_recompose() ->
+ ?FORALL(Map, map(),
+ Map =:= uri_string:parse(uri_string:recompose(Map))
+ ).
+
+%% Stats
+prop_map_key_length_collect() ->
+ ?FORALL(List, map(),
+ collect(length(maps:keys(List)), true)).
+
+prop_map_collect() ->
+ ?FORALL(List, map(),
+ collect(lists:sort(maps:keys(List)), true)).
+
+prop_scheme_collect() ->
+ ?FORALL(List, scheme(),
+ collect(length(List), true)).
+
+
+%%%========================================================================
+%%% Generators
+%%%========================================================================
+
+map() ->
+ ?LET(Gen, comp_proplist(), proplist_to_map(Gen)).
+
+comp_proplist() ->
+ frequency([
+ {2, [?SCHEME,?PATH_ABS]},
+ {2, [?SCHEME,?PATH_ROO]},
+ {2, [?SCHEME,?PATH_EMP]},
+ {2, [?SCHEME,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]},
+
+ {2, [?PATH_ABS]},
+ {2, [?PATH_NOS]},
+ {2, [?PATH_EMP]},
+ {2, [?HOST,?PATH_ABE]},
+ {2, [?USER,?HOST,?PATH_ABE]},
+ {2, [?HOST,?PORT,?PATH_ABE]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+ {2, [?PATH_ABS,?QUERY]},
+ {2, [?PATH_NOS,?QUERY]},
+ {2, [?PATH_EMP,?QUERY]},
+ {2, [?HOST,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?FRAGMENT]},
+ {2, [?PATH_NOS,?FRAGMENT]},
+ {2, [?PATH_EMP,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_NOS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}
+ ]).
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+path_abempty_map() ->
+ frequency([{90, path_abe_map()},
+ {10, path_empty_map()}]).
+
+path_abe_map() ->
+ ?SIZED(Length, path_abe_map(Length, [])).
+%%
+path_abe_map(0, Segments) ->
+ ?LET(Gen, Segments, lists:append(Gen));
+path_abe_map(N, Segments) ->
+ path_abe_map(N-1, [slash(),segment()|Segments]).
+
+
+path_absolute_map() ->
+ ?SIZED(Length, path_absolute_map(Length, [])).
+%%
+path_absolute_map(0, Segments) ->
+ ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen));
+path_absolute_map(N, Segments) ->
+ path_absolute_map(N-1, [slash(),segment()|Segments]).
+
+
+path_noscheme_map() ->
+ ?SIZED(Length, path_noscheme_map(Length, [])).
+%%
+path_noscheme_map(0, Segments) ->
+ ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen));
+path_noscheme_map(N, Segments) ->
+ path_noscheme_map(N-1, [slash(),segment()|Segments]).
+
+path_rootless_map() ->
+ ?SIZED(Length, path_rootless_map(Length, [])).
+%%
+path_rootless_map(0, Segments) ->
+ ?LET(Gen, [segment_nz()|Segments], lists:append(Gen));
+path_rootless_map(N, Segments) ->
+ path_rootless_map(N-1, [slash(),segment()|Segments]).
+
+
+segment_nz() ->
+ non_empty(segment()).
+
+segment_nz_nc() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$@])}
+ ]))).
+
+
+segment() ->
+ list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$:, $@])}
+ ])).
+
+slash() ->
+ "/".
+
+path_empty_map() ->
+ "".
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+host_map() ->
+ frequency([{30, reg_name()},
+ {30, ip_address()}
+ ]).
+
+
+reg_name() ->
+ list(frequency([{30, alpha()},
+ {10, sub_delims()},
+ {10, unicode_char()}
+ ])).
+
+ip_address() ->
+ oneof(["127.0.0.1", "::127.0.0.1",
+ "2001:0db8:0000:0000:0000:0000:1428:07ab",
+ "2001:0db8:0000:0000:0000::1428:07ab",
+ "2001:0db8:0:0:0:0:1428:07ab",
+ "2001:0db8:0::0:1428:07ab"]).
+
+%% Generating only reg-names
+host_uri() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, pct_encoded()}
+ ]))).
+
+%%-------------------------------------------------------------------------
+%% Port, Query, Fragment
+%%-------------------------------------------------------------------------
+port() ->
+ frequency([{10, undefined},
+ {10, range(1,65535)}
+ ]).
+
+query_map() ->
+ unicode().
+
+
+query_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+fragment_map() ->
+ unicode().
+
+fragment_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+
+%%-------------------------------------------------------------------------
+%% Scheme
+%%-------------------------------------------------------------------------
+scheme() ->
+ ?SIZED(Length, scheme_start(Length, [])).
+%%
+scheme_start(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme_start(N, L) ->
+ scheme(N-1,[alpha()|L]).
+
+scheme(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme(N, L) ->
+ scheme(N-1, [scheme_char()|L]).
+
+
+%%-------------------------------------------------------------------------
+%% Misc
+%%-------------------------------------------------------------------------
+unicode() ->
+ list(frequency([{20, alpha()}, % alpha
+ {10, digit()}, % digit
+ {10, unicode_char()} % unicode
+ ])).
+
+scheme_char() ->
+ frequency([{20, alpha()}, % alpha
+ {20, digit()}, % digit
+ {5, oneof([$+, $-, $.])} % punctuation
+ ]).
+
+sub_delims() ->
+ oneof([$!, $$, $&, $', $(, $),
+ $*, $+, $,,$;, $=]).
+
+pchar() ->
+ frequency([{20, unreserved()},
+ {5, pct_encoded()},
+ {5, sub_delims()},
+ {1, oneof([$:, $@])} % punctuation
+ ]).
+
+unreserved() ->
+ frequency([{20, alpha()},
+ {5, digit()},
+ {1, oneof([$-, $., $_, $~])} % punctuation
+ ]).
+
+unicode_char() ->
+ range(913, 1023).
+
+alpha() ->
+ frequency([{20, range($a, $z)}, % letters
+ {20, range($A, $Z)}]). % letters
+
+digit() ->
+ range($0, $9). % numbers
+
+pct_encoded() ->
+ oneof(["%C3%A4", "%C3%A5", "%C3%B6"]).
+
+
+%%%========================================================================
+%%% Helpers
+%%%========================================================================
+proplist_to_map(L) ->
+ lists:foldl(fun({K,V},M) -> M#{K => V};
+ (_,M) -> M
+ end, #{}, L).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
new file mode 100644
index 0000000000..c625da56c6
--- /dev/null
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -0,0 +1,844 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([all/0, suite/0,groups/0,
+ normalize/1,
+ parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
+ parse_binary_host_ipv6/1,
+ parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
+ parse_binary_pct_encoded_userinfo/1, parse_binary_port/1,
+ parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1,
+ parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1,
+ parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
+ parse_pct_encoded_userinfo/1, parse_port/1,
+ parse_query/1, parse_scheme/1, parse_userinfo/1,
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ parse_special/1, parse_special2/1, parse_negative/1,
+ recompose_fragment/1, recompose_parse_fragment/1,
+ recompose_query/1, recompose_parse_query/1,
+ recompose_path/1, recompose_parse_path/1,
+ recompose_autogen/1, parse_recompose_autogen/1,
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
+ ]).
+
+
+-define(SCHEME, "foo").
+-define(USERINFO, "åsa").
+-define(USERINFO_ENC, "%C3%A5sa").
+-define(HOST, "älvsjö").
+-define(HOST_ENC, "%C3%A4lvsj%C3%B6").
+-define(IPV6, "::127.0.0.1").
+-define(IPV6_ENC, "[::127.0.0.1]").
+-define(PORT, 8042).
+-define(PORT_ENC, ":8042").
+-define(PATH, "/där").
+-define(PATH_ENC, "/d%C3%A4r").
+-define(QUERY, "name=örn").
+-define(QUERY_ENC, "?name=%C3%B6rn").
+-define(FRAGMENT, "näsa").
+-define(FRAGMENT_ENC, "#n%C3%A4sa").
+
+
+suite() ->
+ [{timetrap,{minutes,1}}].
+
+all() ->
+ [
+ normalize,
+ parse_binary_scheme,
+ parse_binary_userinfo,
+ parse_binary_pct_encoded_userinfo,
+ parse_binary_host,
+ parse_binary_host_ipv4,
+ parse_binary_host_ipv6,
+ parse_binary_port,
+ parse_binary_path,
+ parse_binary_query,
+ parse_binary_pct_encoded_query,
+ parse_binary_fragment,
+ parse_binary_pct_encoded_fragment,
+ parse_scheme,
+ parse_userinfo,
+ parse_pct_encoded_userinfo,
+ parse_host,
+ parse_host_ipv4,
+ parse_host_ipv6,
+ parse_port,
+ parse_path,
+ parse_query,
+ parse_pct_encoded_query,
+ parse_fragment,
+ parse_pct_encoded_fragment,
+ parse_list,
+ parse_binary,
+ parse_mixed,
+ parse_relative,
+ parse_special,
+ parse_special2,
+ parse_negative,
+ recompose_fragment,
+ recompose_parse_fragment,
+ recompose_query,
+ recompose_parse_query,
+ recompose_path,
+ recompose_parse_path,
+ recompose_autogen,
+ parse_recompose_autogen,
+ transcode_basic,
+ transcode_options,
+ transcode_mixed,
+ transcode_negative
+ ].
+
+groups() ->
+ [].
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions
+%%-------------------------------------------------------------------------
+uri_combinations() ->
+ [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] ||
+ Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none],
+ Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none],
+ Hst <- [fun update_host/1, fun update_host_binary/1,
+ fun update_ipv6/1, fun update_ipv6_binary/1, none],
+ Prt <- [fun update_port/1, none],
+ Pat <- [fun update_path/1, fun update_path_binary/1],
+ Qry <- [fun update_query/1,fun update_query_binary/1, none],
+ Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none],
+ not (Usr =:= none andalso Hst =:= none andalso Prt =/= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =:= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)].
+
+
+generate_test_vector(Comb) ->
+ Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI});
+ (_, Map) -> Map
+ end,
+ lists:foldl(Fun, {#{}, empty}, Comb).
+
+generate_test_vectors(L) ->
+ lists:map(fun generate_test_vector/1, L).
+
+update_fragment({In, empty}) ->
+ {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_list(Out) ->
+ {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_binary(Out) ->
+ {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}.
+
+update_fragment_binary({In, empty}) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>};
+update_fragment_binary({In, Out}) when is_list(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC};
+update_fragment_binary({In, Out}) when is_binary(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}.
+
+
+update_query({In, empty}) ->
+ {In#{query => ?QUERY}, ?QUERY_ENC};
+update_query({In, Out}) when is_list(Out) ->
+ {In#{query => ?QUERY}, Out ++ ?QUERY_ENC};
+update_query({In, Out}) when is_binary(Out) ->
+ {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}.
+
+update_query_binary({In, empty}) ->
+ {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>};
+update_query_binary({In, Out}) when is_list(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC};
+update_query_binary({In, Out}) when is_binary(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}.
+
+update_path({In, empty}) ->
+ {In#{path => ?PATH}, ?PATH_ENC};
+update_path({In, Out}) when is_list(Out) ->
+ {In#{path => ?PATH}, Out ++ ?PATH_ENC};
+update_path({In, Out}) when is_binary(Out) ->
+ {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}.
+
+update_path_binary({In, empty}) ->
+ {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>};
+update_path_binary({In, Out}) when is_list(Out) ->
+ {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC};
+update_path_binary({In, Out}) when is_binary(Out) ->
+ {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}.
+
+update_port({In, Out}) when is_list(Out) ->
+ {In#{port => ?PORT}, Out ++ ?PORT_ENC};
+update_port({In, Out}) when is_binary(Out) ->
+ {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}.
+
+update_host({In, empty}) ->
+ {In#{host => ?HOST}, "//" ++ ?HOST_ENC};
+update_host({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]}
+ end.
+
+update_host_binary({In, empty}) ->
+ {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>};
+update_host_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>};
+ false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>}
+ end.
+
+update_ipv6({In, empty}) ->
+ {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC};
+update_ipv6({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]}
+ end.
+
+update_ipv6_binary({In, empty}) ->
+ {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>};
+update_ipv6_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>};
+ false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>}
+ end.
+
+update_userinfo({In, empty}) ->
+ {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_list(Out) ->
+ {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}.
+
+update_userinfo_binary({In, empty}) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>};
+update_userinfo_binary({In, Out}) when is_list(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo_binary({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}.
+
+update_scheme({In, empty}) ->
+ {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}.
+
+update_scheme_binary({In, empty}) ->
+ {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}.
+
+
+%% Test recompose on a generated test vector
+run_test_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(#{}) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(Map) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+%% Test parse - recompose on a generated test vector
+run_test_parse_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(uri_string:parse("")) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_parse_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(uri_string:parse(URI)) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Parse tests
+%%-------------------------------------------------------------------------
+
+parse_binary_scheme(_Config) ->
+ #{} = uri_string:parse(<<>>),
+ #{path := <<"foo">>} = uri_string:parse(<<"foo">>),
+ #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>),
+ #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>),
+ #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>),
+ #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>),
+ #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>),
+
+ #{path := <<"/">>} = uri_string:parse(<<"/">>),
+ #{host := <<>>} = uri_string:parse(<<"//">>),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
+
+parse_binary_userinfo(_Config) ->
+ #{scheme := <<"user">>, path := <<"password@localhost">>} =
+ uri_string:parse(<<"user:password@localhost">>),
+ #{path := <<"user@">>} = uri_string:parse(<<"user@">>),
+ #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>),
+ #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>),
+ #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>),
+ #{userinfo := <<"user:password">>, host := <<"localhost">>} =
+ uri_string:parse(<<"//user:password@localhost">>),
+ #{scheme := <<"foo">>, path := <<"/user@">>} =
+ uri_string:parse(<<"foo:/user@">>),
+ #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} =
+ uri_string:parse(<<"foo://user@localhost">>),
+ #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} =
+ uri_string:parse(<<"foo://user:password@localhost">>).
+
+parse_binary_pct_encoded_userinfo(_Config) ->
+ #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
+ uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} =
+ uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>),
+ {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>).
+
+parse_binary_host(_Config) ->
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>),
+ #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} =
+ uri_string:parse(<<"foo://user@hostname">>).
+
+parse_binary_host_ipv4(_Config) ->
+ #{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>),
+ #{host := <<"127.0.0.1">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"//127.0.0.1/over/there">>),
+ #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//127.0.0.1?name=ferret">>),
+ #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>),
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>).
+
+parse_binary_host_ipv6(_Config) ->
+ #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>),
+ #{host := <<"2001:0db8:0000:0000:0000:0000:1428:07ab">>} =
+ uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>),
+ #{host := <<"::127.0.0.1">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"//[::127.0.0.1]/over/there">>),
+ #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>),
+ #{host := <<"::127.0.0.1">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//[::127.0.0.1]#nose">>),
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>),
+ {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>).
+
+parse_binary_port(_Config) ->
+ #{path:= <<"/:8042">>} =
+ uri_string:parse(<<"/:8042">>),
+ #{host:= <<>>, port := 8042} =
+ uri_string:parse(<<"//:8042">>),
+ #{host := <<"example.com">>, port:= 8042} =
+ uri_string:parse(<<"//example.com:8042">>),
+ #{scheme := <<"foo">>, path := <<"/:8042">>} =
+ uri_string:parse(<<"foo:/:8042">>),
+ #{scheme := <<"foo">>, host := <<>>, port := 8042} =
+ uri_string:parse(<<"foo://:8042">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} =
+ uri_string:parse(<<"foo://example.com:8042">>),
+ {error,invalid_uri,":"} = uri_string:parse(":600"),
+ {error,invalid_uri,"x"} = uri_string:parse("//:8042x").
+
+parse_binary_path(_Config) ->
+ #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>),
+ #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>),
+ #{scheme := <<"foo">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"foo:/over/there">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"foo://example.com/over/there">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} =
+ uri_string:parse(<<"foo://example.com:8042/over/there">>).
+
+parse_binary_query(_Config) ->
+ #{scheme := <<"foo">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:?name=ferret">>),
+ #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:over/there?name=ferret">>),
+ #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:/over/there?name=ferret">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo://example.com?name=ferret">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo://example.com/?name=ferret">>),
+
+ #{path := <<>>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"?name=ferret">>),
+ #{path := <<"over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"over/there?name=ferret">>),
+ #{path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"/?name=ferret">>),
+ #{path := <<"/over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"/over/there?name=ferret">>),
+ #{host := <<"example.com">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//example.com?name=ferret">>),
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//example.com/?name=ferret">>).
+
+parse_binary_pct_encoded_query(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
+ query := <<"name=合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>).
+
+parse_binary_fragment(_Config) ->
+ #{scheme := <<"foo">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:#nose">>),
+ #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:over/there#nose">>),
+ #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:/over/there#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com/#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com#nose">>),
+
+ #{fragment := <<"nose">>} =
+ uri_string:parse(<<"#nose">>),
+ #{path := <<"over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"over/there#nose">>),
+ #{path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"/#nose">>),
+ #{path := <<"/over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"/over/there#nose">>),
+ #{host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//example.com#nose">>),
+ #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//example.com/#nose">>).
+
+parse_binary_pct_encoded_fragment(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>).
+
+parse_scheme(_Config) ->
+ #{} = uri_string:parse(""),
+ #{path := "foo"} = uri_string:parse("foo"),
+ #{scheme := "foo"} = uri_string:parse("foo:"),
+ #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),
+ #{scheme := "foo", host := ""} = uri_string:parse("foo://"),
+ #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"),
+ #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"),
+
+ #{path := "/"} = uri_string:parse("/"),
+ #{host := ""} = uri_string:parse("//"),
+ #{host := "", path := "/"} = uri_string:parse("///").
+
+parse_userinfo(_Config) ->
+ #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"),
+ #{path := "user@"} = uri_string:parse("user@"),
+ #{path := "/user@"} = uri_string:parse("/user@"),
+ #{path := "user@localhost"} = uri_string:parse("user@localhost"),
+ #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"),
+ #{userinfo := "user:password", host := "localhost"} =
+ uri_string:parse("//user:password@localhost"),
+ #{scheme := "foo", path := "/user@"} =
+ uri_string:parse("foo:/user@"),
+ #{scheme := "foo", userinfo := "user", host := "localhost"} =
+ uri_string:parse("foo://user@localhost"),
+ #{scheme := "foo", userinfo := "user:password", host := "localhost"} =
+ uri_string:parse("foo://user:password@localhost").
+
+parse_pct_encoded_userinfo(_Config) ->
+ #{scheme := "user", path := "合@気道"} =
+ uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合", host := "気道"} =
+ uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合:気", host := "道"} =
+ uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"),
+ #{scheme := "foo", path := "/合気道@"} =
+ uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{scheme := "foo", userinfo := "合", host := "気道"} =
+ uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{scheme := "foo", userinfo := "合:気", host := "道"} =
+ uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"),
+ {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@").
+
+
+parse_host(_Config) ->
+ #{host := "hostname"} = uri_string:parse("//hostname"),
+ #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"),
+ #{host := "hostname",scheme := "foo", userinfo := "user"} =
+ uri_string:parse("foo://user@hostname").
+
+parse_host_ipv4(_Config) ->
+ #{host := "127.0.0.1"} = uri_string:parse("//127.0.0.1"),
+ #{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} =
+ uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"),
+ #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"),
+ #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
+ #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"),
+ {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1").
+
+parse_host_ipv6(_Config) ->
+ #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"),
+ #{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"),
+ #{host := "::127.0.0.1", query := "name=ferret"} =
+ uri_string:parse("//[::127.0.0.1]?name=ferret"),
+ #{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"),
+ {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"),
+ {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]").
+
+parse_port(_Config) ->
+ #{path:= "/:8042"} =
+ uri_string:parse("/:8042"),
+ #{host:= "", port := 8042} =
+ uri_string:parse("//:8042"),
+ #{host := "example.com", port:= 8042} =
+ uri_string:parse("//example.com:8042"),
+ #{scheme := "foo", path := "/:8042"} =
+ uri_string:parse("foo:/:8042"),
+ #{scheme := "foo", host := "", port := 8042} =
+ uri_string:parse("foo://:8042"),
+ #{scheme := "foo", host := "example.com", port := 8042} =
+ uri_string:parse("foo://example.com:8042").
+
+parse_path(_Config) ->
+ #{path := "over/there"} = uri_string:parse("over/there"),
+ #{path := "/over/there"} = uri_string:parse("/over/there"),
+ #{scheme := "foo", path := "/over/there"} =
+ uri_string:parse("foo:/over/there"),
+ #{scheme := "foo", host := "example.com", path := "/over/there"} =
+ uri_string:parse("foo://example.com/over/there"),
+ #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} =
+ uri_string:parse("foo://example.com:8042/over/there").
+
+parse_query(_Config) ->
+ #{scheme := "foo", query := "name=ferret"} =
+ uri_string:parse("foo:?name=ferret"),
+ #{scheme := "foo", path:= "over/there", query := "name=ferret"} =
+ uri_string:parse("foo:over/there?name=ferret"),
+ #{scheme := "foo", path:= "/over/there", query := "name=ferret"} =
+ uri_string:parse("foo:/over/there?name=ferret"),
+ #{scheme := "foo", host := "example.com", query := "name=ferret"} =
+ uri_string:parse("foo://example.com?name=ferret"),
+ #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} =
+ uri_string:parse("foo://example.com/?name=ferret"),
+
+ #{path := "", query := "name=ferret"} =
+ uri_string:parse("?name=ferret"),
+ #{path := "over/there", query := "name=ferret"} =
+ uri_string:parse("over/there?name=ferret"),
+ #{path := "/", query := "name=ferret"} =
+ uri_string:parse("/?name=ferret"),
+ #{path := "/over/there", query := "name=ferret"} =
+ uri_string:parse("/over/there?name=ferret"),
+ #{host := "example.com", query := "name=ferret"} =
+ uri_string:parse("//example.com?name=ferret"),
+ #{host := "example.com", path := "/", query := "name=ferret"} =
+ uri_string:parse("//example.com/?name=ferret").
+
+parse_pct_encoded_query(_Config) ->
+ #{scheme := "foo", host := "example.com", path := "/",
+ query := "name=合気道"} =
+ uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", query := "name=合気道"} =
+ uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93").
+
+parse_fragment(_Config) ->
+ #{scheme := "foo", fragment := "nose"} =
+ uri_string:parse("foo:#nose"),
+ #{scheme := "foo", path:= "over/there", fragment := "nose"} =
+ uri_string:parse("foo:over/there#nose"),
+ #{scheme := "foo", path:= "/over/there", fragment := "nose"} =
+ uri_string:parse("foo:/over/there#nose"),
+ #{scheme := "foo", host := "example.com", fragment := "nose"} =
+ uri_string:parse("foo://example.com#nose"),
+ #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} =
+ uri_string:parse("foo://example.com/#nose"),
+ #{scheme := "foo", host := "example.com", fragment := "nose"} =
+ uri_string:parse("foo://example.com#nose"),
+
+ #{fragment := "nose"} =
+ uri_string:parse("#nose"),
+ #{path := "over/there", fragment := "nose"} =
+ uri_string:parse("over/there#nose"),
+ #{path := "/", fragment := "nose"} =
+ uri_string:parse("/#nose"),
+ #{path := "/over/there", fragment := "nose"} =
+ uri_string:parse("/over/there#nose"),
+ #{host := "example.com", fragment := "nose"} =
+ uri_string:parse("//example.com#nose"),
+ #{host := "example.com", path := "/", fragment := "nose"} =
+ uri_string:parse("//example.com/#nose").
+
+parse_pct_encoded_fragment(_Config) ->
+ #{scheme := "foo", host := "example.com", fragment := "合気道"} =
+ uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", fragment := "合気道"} =
+ uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93").
+
+parse_list(_Config) ->
+ #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),
+ #{scheme := "foo", host := "example.com", port := 8042,
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
+ uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"),
+ #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042,
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
+ uri_string:parse("foo://admin:[email protected]:8042/over/there?name=ferret#nose").
+
+parse_binary(_Config) ->
+ #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042,
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>),
+ #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042,
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://admin:[email protected]:8042/over/there?name=ferret#nose">>).
+
+
+parse_mixed(_Config) ->
+ #{scheme := "foo", path := "bar"} =
+ uri_string:parse(lists:append("fo",<<"o:bar">>)),
+ #{scheme := "foo", path := "bar"} =
+ uri_string:parse(lists:append("foo:b",<<"ar">>)),
+ #{scheme := "foo", path := "bar:bar"} =
+ uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]).
+
+parse_relative(_Config) ->
+ #{path := "/path"} =
+ uri_string:parse(lists:append("/pa",<<"th">>)),
+ #{path := "foo"} =
+ uri_string:parse(lists:append("fo",<<"o">>)).
+
+parse_special(_Config) ->
+ #{host := [],query := []} = uri_string:parse("//?"),
+ #{fragment := [],host := []} = uri_string:parse("//#"),
+ #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"),
+ #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>),
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>),
+ #{host := [],path := "/",query := []} = uri_string:parse("///?"),
+ #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"),
+ #{host := "foo",query := []} = uri_string:parse("//foo?"),
+ #{fragment := [],host := "foo"} = uri_string:parse("//foo#"),
+ #{host := "foo",path := "/"} = uri_string:parse("//foo/"),
+ #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"),
+ #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
+ #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"),
+ #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"),
+ #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"),
+ #{path := [],query := []} = uri_string:parse("?"),
+ #{path := [],query := "?"} = uri_string:parse("??"),
+ #{path := [],query := "??"} = uri_string:parse("???").
+
+parse_special2(_Config) ->
+ #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"),
+ #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"),
+ #{host := [],path := [],userinfo := []} = uri_string:parse("//@"),
+ #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"),
+ #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"),
+ #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"),
+ #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"),
+ #{host := [],path := [],port := undefined} = uri_string:parse("//:").
+
+parse_negative(_Config) ->
+ {error,invalid_uri,"å"} = uri_string:parse("å"),
+ {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"),
+ {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"),
+ {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"),
+ {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"),
+ {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8").
+
+
+%%-------------------------------------------------------------------------
+%% Recompose tests
+%%-------------------------------------------------------------------------
+recompose_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}),
+ ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}).
+
+recompose_parse_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)),
+ ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)).
+
+recompose_query(_Config) ->
+ <<?QUERY_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}),
+ <<?QUERY_ENC?FRAGMENT_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>,
+ fragment => <<?FRAGMENT/utf8>>,
+ path => <<>>}),
+ "?name=%C3%B6rn" =
+ uri_string:recompose(#{query => "name=örn", path => ""}),
+ "?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{query => "name=örn",
+ fragment => "näsa",
+ path => ""}).
+
+recompose_parse_query(_Config) ->
+ <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)),
+ <<"?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)),
+ "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")),
+ "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")).
+
+recompose_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>}),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"name=örn"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"name=örn"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+
+
+ "/d%C3%A4r" =
+ uri_string:recompose(#{path => "/där"}),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ fragment => "näsa"}),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(#{path => "/där",
+ query => "name=örn"}),
+ "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ query => "name=örn",
+ fragment => "näsa"}).
+
+
+recompose_parse_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)),
+
+ "/d%C3%A4r" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r")),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")).
+
+
+recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_recompose/1, Tests).
+
+parse_recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_parse_recompose/1, Tests).
+
+transcode_basic(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]).
+
+transcode_options(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []),
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]).
+
+transcode_mixed(_Config) ->
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]).
+
+transcode_negative(_Config) ->
+ {error,invalid_percent_encoding,"%BXbar"} =
+ uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ {error,invalid_input,<<"ö">>} =
+ uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
+
+normalize(_Config) ->
+ "/a/g" = uri_string:normalize("/a/b/c/./../../g"),
+ <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),
+ "http://localhost-%C3%B6rebro/a/g" =
+ uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"),
+ <<"http://localhost-%C3%B6rebro/a/g">> =
+ uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>),
+ <<"https://localhost/">> =
+ uri_string:normalize(<<"https://localhost:443">>),
+ <<"https://localhost:445/">> =
+ uri_string:normalize(<<"https://localhost:445">>),
+ <<"ftp://localhost">> =
+ uri_string:normalize(<<"ftp://localhost:21">>),
+ <<"ssh://localhost">> =
+ uri_string:normalize(<<"ssh://localhost:22">>),
+ <<"sftp://localhost">> =
+ uri_string:normalize(<<"sftp://localhost:22">>),
+ <<"tftp://localhost">> =
+ uri_string:normalize(<<"tftp://localhost:69">>).
diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl
new file mode 100644
index 0000000000..ae2c61c7aa
--- /dev/null
+++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl
@@ -0,0 +1,39 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_property_test_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+-compile(export_all).
+
+all() -> [recompose].
+
+init_per_suite(Config) ->
+ ct_property_test:init_per_suite(Config).
+
+end_per_suite(Config) ->
+ Config.
+
+%%%========================================================================
+%%% Test suites
+%%%========================================================================
+recompose(Config) ->
+ ct_property_test:quickcheck(
+ uri_string_recompose:prop_recompose(),
+ Config).