diff options
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/ref_man.xml | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/specs.xml | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 230 | ||||
-rw-r--r-- | lib/stdlib/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/src/stdlib.app.src | 1 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 1842 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/README | 12 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/uri_string_recompose.erl | 361 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 844 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_property_test_SUITE.erl | 39 |
12 files changed, 3334 insertions, 0 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 93eac8220d..aeed79408b 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -98,6 +98,7 @@ XML_REF3_FILES = \ sys.xml \ timer.xml \ unicode.xml \ + uri_string.xml \ win32reg.xml \ zip.xml diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml index 878a3babc5..68bfddbc71 100644 --- a/lib/stdlib/doc/src/ref_man.xml +++ b/lib/stdlib/doc/src/ref_man.xml @@ -93,6 +93,7 @@ <xi:include href="sys.xml"/> <xi:include href="timer.xml"/> <xi:include href="unicode.xml"/> + <xi:include href="uri_string.xml"/> <xi:include href="win32reg.xml"/> <xi:include href="zip.xml"/> </application> diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml index 45b207b13d..d559adf9b6 100644 --- a/lib/stdlib/doc/src/specs.xml +++ b/lib/stdlib/doc/src/specs.xml @@ -60,6 +60,7 @@ <xi:include href="../specs/specs_sys.xml"/> <xi:include href="../specs/specs_timer.xml"/> <xi:include href="../specs/specs_unicode.xml"/> + <xi:include href="../specs/specs_uri_string.xml"/> <xi:include href="../specs/specs_win32reg.xml"/> <xi:include href="../specs/specs_zip.xml"/> </specs> diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml new file mode 100644 index 0000000000..9ace2b0a05 --- /dev/null +++ b/lib/stdlib/doc/src/uri_string.xml @@ -0,0 +1,230 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2017</year><year>2017</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + </legalnotice> + + <title>uri_string</title> + <prepared>Péter Dimitrov</prepared> + <docno>1</docno> + <date>2017-10-24</date> + <rev>A</rev> + </header> + <module>uri_string</module> + <modulesummary>URI processing functions.</modulesummary> + <description> + <p>This module contains functions for parsing and handling URIs + (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>). + </p> + <p>A URI is an identifier consisting of a sequence of characters matching the syntax + rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>. + </p> + <p> The generic URI syntax consists of a hierarchical sequence of components referred + to as the scheme, authority, path, query, and fragment:</p> + <pre> + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + </pre><br></br> + <p>The interpretation of a URI depends only on the characters used and not on how those + characters are represented in a network protocol.</p> + <p>The functions implemented by this module cover the following use cases:</p> + <list type="bulleted"> + <item>Parsing URIs into its components and returing a map<br></br> + <seealso marker="#parse/1"><c>parse/1</c></seealso> + </item> + <item>Recomposing a map of URI components into a URI string<br></br> + <seealso marker="#recompose/1"><c>recompose/1</c></seealso> + </item> + <item>Changing inbound binary and percent-encoding of URIs<br></br> + <seealso marker="#transcode/2"><c>transcode/2</c></seealso> + </item> + <item>Transforming URIs into a normalized form<br></br> + <seealso marker="#normalize/1"><c>normalize/1</c></seealso> + </item> + </list> + <p>There are four different encodings present during the handling of URIs:</p> + <list type="bulleted"> + <item>Inbound binary encoding in binaries</item> + <item>Inbound percent-encoding in lists and binaries</item> + <item>Outbound binary encoding in binaries</item> + <item>Outbound percent-encoding in lists and binaries</item> + </list> + <p>Functions with <c>uri_string()</c> argument accept lists, binaries and + mixed lists (lists with binary elements) as input type. All of the functions but + <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries + and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p> + <p>Unless otherwise specified the return value type and encoding are the same as the input + type and encoding. That is, binary input returns binary output, list input returns a list + output but mixed input returns list output.</p> + <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding + and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert + between the supported encodings, it takes a <c>uri_string()</c> and a list of options + specifying inbound and outbound encodings.</p> + <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific + character encoding and it is usually defined by the protocol or surrounding text. This library + takes the same assumption, binary and percent-encoding are handled as one configuration unit, + they cannot be set to different values.</p> + </description> + + <datatypes> + <datatype> + <name name="error"/> + <desc> + <p>Error tuple indicating the type of error. Possible values of the second component:</p> + <list type="bulleted"> + <item><c>invalid_input</c></item> + <item><c>invalid_map</c></item> + <item><c>invalid_percent_encoding</c></item> + <item><c>invalid_scheme</c></item> + <item><c>invalid_uri</c></item> + <item><c>invalid_utf8</c></item> + </list> + <p>The third component is a term providing additional information about the + cause of the error.</p> + </desc> + </datatype> + <datatype> + <name name="uri_map"/> + <desc> + <p>Map holding the main components of a URI.</p> + </desc> + </datatype> + <datatype> + <name name="uri_string"/> + <desc> + <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two, + representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant URI (<em>percent-encoded form</em>). + A URI is a sequence of characters from a very limited set: the letters of + the basic Latin alphabet, digits, and a few special characters.</p> + </desc> + </datatype> + </datatypes> + + <funcs> + + <func> + <name name="normalize" arity="1"/> + <fsummary>Syntax-based normalization.</fsummary> + <desc> + <p>Transforms <c><anno>URIString</anno></c> into a normalized form + using Syntax-Based Normalization as defined by + <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p> + <p>This function implements case normalization, percent-encoding + normalization, path segment normalization and scheme based normalization + for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:normalize("/a/b/c/./../../g").</input> +"/a/g" +2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]> +<![CDATA[<<"mid/6">>]]> +3> uri_string:normalize("http://localhost:80"). +"https://localhost/" + </pre> + </desc> + </func> + + <func> + <name name="parse" arity="1"/> + <fsummary>Parse URI into a map.</fsummary> + <desc> + <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed + components of the <c>URI</c>. + If parsing fails, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#recompose/1"> + <c>recompose/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input> +#{fragment => "nose",host => "example.com", + path => "/over/there",port => 8042,query => "name=ferret", + scheme => foo,userinfo => "user"} +2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]> +<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>, + port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>, + userinfo => <<"user">>}]]> + </pre> + </desc> + </func> + + <func> + <name name="recompose" arity="1"/> + <fsummary>Recompose URI.</fsummary> + <desc> + <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant + <c><anno>URIString</anno></c> (percent-encoded), based on the components of + <c><anno>URIMap</anno></c>. + If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#parse/1"> + <c>parse/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input> +1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. +#{fragment => "top",host => "example.com", + path => "/over/there",port => 8042,query => "?name=ferret", + scheme => foo,userinfo => "user"} + +2> <input>uri_string:recompose(URIMap).</input> +"foo://example.com:8042/over/there?name=ferret#nose"</pre> + </desc> + </func> + + <func> + <name name="transcode" arity="2"/> + <fsummary>Transcode URI.</fsummary> + <desc> + <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c><anno>URIString</anno></c>, + where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound + (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c> + and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the + input and output data. Mixed encoding, where binary encoding is not the same as + percent-encoding, is not supported. + If an argument is invalid, an error tuple is returned.</p> + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input> +1> [{in_encoding, utf32},{out_encoding, utf8}]). +<![CDATA[<<"foo%C3%B6bar"/utf8>>]]> +2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1}, +2> {out_encoding, utf8}]). +"foo%C3%B6bar" + </pre> + </desc> + </func> + + </funcs> +</erlref> diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile index bf836203ec..8b156929d7 100644 --- a/lib/stdlib/src/Makefile +++ b/lib/stdlib/src/Makefile @@ -121,6 +121,7 @@ MODULES= \ timer \ unicode \ unicode_util \ + uri_string \ win32reg \ zip diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index ab0824ca17..5fb48acfab 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -101,6 +101,7 @@ timer, unicode, unicode_util, + uri_string, win32reg, zip]}, {registered,[timer_server,rsh_starter,take_over_monitor,pool_master, diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl new file mode 100644 index 0000000000..22212da222 --- /dev/null +++ b/lib/stdlib/src/uri_string.erl @@ -0,0 +1,1842 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%% +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%% +%% [RFC 3986, Chapter 3. Syntax Components] +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% +%% +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%% +%% +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%% +%% +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%% +%% +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%% +%% +%% [RFC 3986, Chapter 4.3. Absolute URI] +%% +%% Some protocol elements allow only the absolute form of a URI without +%% a fragment identifier. For example, defining a base URI for later +%% use by relative references calls for an absolute-URI syntax rule that +%% does not allow a fragment. +%% +%% absolute-URI = scheme ":" hier-part [ "?" query ] +%% +-module(uri_string). + +%%------------------------------------------------------------------------- +%% External API +%%------------------------------------------------------------------------- +-export([normalize/1, parse/1, + recompose/1, transcode/2]). +-export_type([error/0, uri_map/0, uri_string/0]). + + +%%------------------------------------------------------------------------- +%% Internal API +%%------------------------------------------------------------------------- +-export([is_host/1, is_path/1]). % suppress warnings + + +%%------------------------------------------------------------------------- +%% Macros +%%------------------------------------------------------------------------- +-define(CHAR(Char), <<Char/utf8>>). +-define(STRING_EMPTY, <<>>). +-define(STRING(MatchStr), <<MatchStr/binary>>). +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(DEC2HEX(X), + if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; + ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 + end). + +-define(HEX2DEC(X), + if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; + ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; + ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 + end). + + +%%%========================================================================= +%%% API +%%%========================================================================= + +%%------------------------------------------------------------------------- +%% URI compliant with RFC 3986 +%% ASCII %x21 - %x7A ("!" - "z") except +%% %x34 " double quote +%% %x60 < less than +%% %x62 > greater than +%% %x92 \ backslash +%% %x94 ^ caret / circumflex +%% %x96 ` grave / accent +%%------------------------------------------------------------------------- +-type uri_string() :: iodata(). +-type error() :: {error, atom(), term()}. + + +%%------------------------------------------------------------------------- +%% RFC 3986, Chapter 3. Syntax Components +%%------------------------------------------------------------------------- +-type uri_map() :: + #{fragment => unicode:chardata(), + host => unicode:chardata(), + path => unicode:chardata(), + port => non_neg_integer() | undefined, + query => unicode:chardata(), + scheme => unicode:chardata(), + userinfo => unicode:chardata()} | #{}. + + +%%------------------------------------------------------------------------- +%% Normalize URIs +%%------------------------------------------------------------------------- +-spec normalize(URIString) -> NormalizedURI when + URIString :: uri_string(), + NormalizedURI :: uri_string(). +normalize(URIString) -> + %% Percent-encoding normalization and case normalization for + %% percent-encoded triplets are achieved by running parse and + %% recompose on the input URI string. + recompose( + normalize_path_segment( + normalize_scheme_based( + normalize_case( + parse(URIString))))). + + +%%------------------------------------------------------------------------- +%% Parse URIs +%%------------------------------------------------------------------------- +-spec parse(URIString) -> URIMap when + URIString :: uri_string(), + URIMap :: uri_map() + | error(). +parse(URIString) when is_binary(URIString) -> + try parse_uri_reference(URIString, #{}) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +parse(URIString) when is_list(URIString) -> + try + Binary = unicode:characters_to_binary(URIString), + Map = parse_uri_reference(Binary, #{}), + convert_mapfields_to_list(Map) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%------------------------------------------------------------------------- +%% Recompose URIs +%%------------------------------------------------------------------------- +-spec recompose(URIMap) -> URIString when + URIMap :: uri_map(), + URIString :: uri_string() + | error(). +recompose(Map) -> + case is_valid_map(Map) of + false -> + {error, invalid_map, Map}; + true -> + try + T0 = update_scheme(Map, empty), + T1 = update_userinfo(Map, T0), + T2 = update_host(Map, T1), + T3 = update_port(Map, T2), + T4 = update_path(Map, T3), + T5 = update_query(Map, T4), + update_fragment(Map, T5) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end + end. + + +%%------------------------------------------------------------------------- +%% Transcode URIs +%%------------------------------------------------------------------------- +-spec transcode(URIString, Options) -> Result when + URIString :: uri_string(), + Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], + Result :: uri_string() + | error(). +transcode(URIString, Options) when is_binary(URIString) -> + try + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + List = convert_to_list(URIString, InEnc), + Output = transcode(List, [], InEnc, OutEnc), + convert_to_binary(Output, utf8, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +transcode(URIString, Options) when is_list(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + Flattened = flatten_list(URIString, InEnc), + try transcode(Flattened, [], InEnc, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +%%------------------------------------------------------------------------- +%% Converts Map fields to lists +%%------------------------------------------------------------------------- +convert_mapfields_to_list(Map) -> + Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V); + (_, V) -> V end, + maps:map(Fun, Map). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%%------------------------------------------------------------------------- +-spec parse_uri_reference(binary(), uri_map()) -> uri_map(). +parse_uri_reference(<<>>, _) -> #{path => <<>>}; +parse_uri_reference(URIString, URI) -> + try parse_scheme_start(URIString, URI) + catch + throw:{_,_,_} -> + parse_relative_part(URIString, URI) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_relative_part(binary(), uri_map()) -> uri_map(). +parse_relative_part(?STRING_REST("//", Rest), URI) -> + %% Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{userinfo => decode_userinfo(Userinfo)} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{host => decode_host(remove_brackets(Host))} + end; +parse_relative_part(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST($/, Path))}; +parse_relative_part(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{query => decode_query(Query)}; +parse_relative_part(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{fragment => decode_fragment(Fragment)}; +parse_relative_part(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> + {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST(Char, Path))}; + false -> throw({error,invalid_uri,[Char]}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% path-abempty +%%------------------------------------------------------------------------- +-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment(?STRING_REST(Char, Rest), URI) -> + case is_pchar(Char) of + true -> parse_segment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% path-noscheme +%%------------------------------------------------------------------------- +-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment_nz_nc(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is pchar. +-spec is_pchar(char()) -> boolean(). +is_pchar($%) -> true; % pct-encoded +is_pchar($:) -> true; +is_pchar($@) -> true; +is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is segment_nz_nc. +-spec is_segment_nz_nc(char()) -> boolean(). +is_segment_nz_nc($%) -> true; % pct-encoded +is_segment_nz_nc($@) -> true; +is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%%------------------------------------------------------------------------- +-spec parse_scheme_start(binary(), uri_map()) -> uri_map(). +parse_scheme_start(?STRING_REST(Char, Rest), URI) -> + case is_alpha(Char) of + true -> {T, URI1} = parse_scheme(Rest, URI), + Scheme = calculate_parsed_scheme(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{scheme => ?STRING_REST(Char, Scheme)}; + false -> throw({error,invalid_uri,[Char]}) + end. + +%% Add path component if it missing after parsing the URI. +%% According to the URI specification there is always a +%% path component in every URI-reference and it can be +%% empty. +maybe_add_path(Map) -> + case maps:is_key(path, Map) of + false -> + Map#{path => <<>>}; + _Else -> + Map + end. + + +-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. +parse_scheme(?STRING_REST($:, Rest), URI) -> + {_, URI1} = parse_hier(Rest, URI), + {Rest, URI1}; +parse_scheme(?STRING_REST(Char, Rest), URI) -> + case is_scheme(Char) of + true -> parse_scheme(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_scheme(?STRING_EMPTY, _URI) -> + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in scheme +-spec is_scheme(char()) -> boolean(). +is_scheme($+) -> true; +is_scheme($-) -> true; +is_scheme($.) -> true; +is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). + + +%%------------------------------------------------------------------------- +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. +parse_hier(?STRING_REST("//", Rest), URI) -> + % Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}} + end; +parse_hier(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_hier(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_hier(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless + case is_pchar(Char) of + true -> % segment_nz + {T, URI1} = parse_segment(Rest, URI), + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}}; + false -> throw({error,invalid_uri,[Char]}) + end; +parse_hier(?STRING_EMPTY, URI) -> + {<<>>, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% The authority component is preceded by a double slash ("//") and is +%% terminated by the next slash ("/"), question mark ("?"), or number +%% sign ("#") character, or by the end of the URI. +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%%------------------------------------------------------------------------- +-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. +parse_userinfo(?CHAR($@), URI) -> + {?STRING_EMPTY, URI#{host => <<>>}}; +parse_userinfo(?STRING_REST($@, Rest), URI) -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; +parse_userinfo(?STRING_REST(Char, Rest), URI) -> + case is_userinfo(Char) of + true -> parse_userinfo(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_userinfo(?STRING_EMPTY, _URI) -> + %% URI cannot end in userinfo state + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in userinfo +-spec is_userinfo(char()) -> boolean(). +is_userinfo($%) -> true; % pct-encoded +is_userinfo($:) -> true; +is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%%------------------------------------------------------------------------- +-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. +parse_host(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_host(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_host(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_host(?STRING_REST($[, Rest), URI) -> + parse_ipv6_bin(Rest, [], URI); +parse_host(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_host(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_ipv4_bin(Rest, [Char], URI); + false -> parse_reg_name(?STRING_REST(Char, Rest), URI) + end; +parse_host(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. +parse_reg_name(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_reg_name(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_reg_name(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_reg_name(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_reg_name(?STRING_REST(Char, Rest), URI) -> + case is_reg_name(Char) of + true -> parse_reg_name(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_reg_name(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +%% Check if char is allowed in reg-name +-spec is_reg_name(char()) -> boolean(). +is_reg_name($%) -> true; +is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv4(Char) of + true -> parse_ipv4_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in IPv4 addresses +-spec is_ipv4(char()) -> boolean(). +is_ipv4($.) -> true; +is_ipv4(Char) -> is_digit(Char). + +-spec validate_ipv4_address(list()) -> list(). +validate_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> + _ = validate_ipv6_address(lists:reverse(Acc)), + parse_ipv6_bin_end(Rest, URI); +parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> + throw({error,invalid_uri,<<>>}). + +%% Check if char is allowed in IPv6 addresses +-spec is_ipv6(char()) -> boolean(). +is_ipv6($:) -> true; +is_ipv6($.) -> true; +is_ipv6(Char) -> is_hex_digit(Char). + + +-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin_end(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin_end(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec validate_ipv6_address(list()) -> list(). +validate_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%%------------------------------------------------------------------------- +-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. +parse_port(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_port(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_port(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_port(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_port(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_port(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. +parse_query(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_query(?STRING_REST(Char, Rest), URI) -> + case is_query(Char) of + true -> parse_query(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_query(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in query +-spec is_query(char()) -> boolean(). +is_query($/) -> true; +is_query($?) -> true; +is_query(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_fragment(?STRING_REST(Char, Rest), URI) -> + case is_fragment(Char) of + true -> parse_fragment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_fragment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in fragment +-spec is_fragment(char()) -> boolean(). +is_fragment($/) -> true; +is_fragment($?) -> true; +is_fragment(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%%------------------------------------------------------------------------- + +%% Check if char is sub-delim. +-spec is_sub_delim(char()) -> boolean(). +is_sub_delim($!) -> true; +is_sub_delim($$) -> true; +is_sub_delim($&) -> true; +is_sub_delim($') -> true; +is_sub_delim($() -> true; +is_sub_delim($)) -> true; + +is_sub_delim($*) -> true; +is_sub_delim($+) -> true; +is_sub_delim($,) -> true; +is_sub_delim($;) -> true; +is_sub_delim($=) -> true; +is_sub_delim(_) -> false. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%%------------------------------------------------------------------------- +-spec is_unreserved(char()) -> boolean(). +is_unreserved($-) -> true; +is_unreserved($.) -> true; +is_unreserved($_) -> true; +is_unreserved($~) -> true; +is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). + +-spec is_alpha(char()) -> boolean(). +is_alpha(C) + when $A =< C, C =< $Z; + $a =< C, C =< $z -> true; +is_alpha(_) -> false. + +-spec is_digit(char()) -> boolean(). +is_digit(C) + when $0 =< C, C =< $9 -> true; +is_digit(_) -> false. + +-spec is_hex_digit(char()) -> boolean(). +is_hex_digit(C) + when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; +is_hex_digit(_) -> false. + + +%% Remove enclosing brackets from binary +-spec remove_brackets(binary()) -> binary(). +remove_brackets(<<$[/utf8, Rest/binary>>) -> + {H,T} = split_binary(Rest, byte_size(Rest) - 1), + case T =:= <<$]/utf8>> of + true -> H; + false -> Rest + end; +remove_brackets(Addr) -> Addr. + + +%%------------------------------------------------------------------------- +%% Helper functions for calculating the parsed binary. +%%------------------------------------------------------------------------- +-spec calculate_parsed_scheme(binary(), binary()) -> binary(). +calculate_parsed_scheme(Input, <<>>) -> + strip_last_char(Input, [$:]); +calculate_parsed_scheme(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_part(binary(), binary()) -> binary(). +calculate_parsed_part(Input, <<>>) -> + strip_last_char(Input, [$?,$#]); +calculate_parsed_part(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_userinfo(binary(), binary()) -> binary(). +calculate_parsed_userinfo(Input, <<>>) -> + strip_last_char(Input, [$?,$#,$@]); +calculate_parsed_userinfo(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_host_port(binary(), binary()) -> binary(). +calculate_parsed_host_port(Input, <<>>) -> + strip_last_char(Input, [$:,$?,$#,$/]); +calculate_parsed_host_port(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +calculate_parsed_query_fragment(Input, <<>>) -> + strip_last_char(Input, [$#]); +calculate_parsed_query_fragment(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +get_port(<<>>) -> + undefined; +get_port(B) -> + try binary_to_integer(B) + catch + error:badarg -> + throw({error, invalid_uri, B}) + end. + + +%% Strip last char if it is in list +%% +%% This function is optimized for speed: parse/1 is about 10% faster than +%% with an alternative implementation based on lists and sets. +strip_last_char(<<>>, _) -> <<>>; +strip_last_char(Input, [C0]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2,C3]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + C3 -> + init_binary(Input); + _Else -> + Input + end. + + +%% Get parsed binary +get_parsed_binary(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +%% Return all bytes of the binary except the last one. The binary must be non-empty. +init_binary(B) -> + {Init, _} = + split_binary(B, byte_size(B) - 1), + Init. + + +%% Returns the size of a binary exluding the first element. +%% Used in calls to split_binary(). +-spec byte_size_exl_head(binary()) -> number(). +byte_size_exl_head(<<>>) -> 0; +byte_size_exl_head(Binary) -> byte_size(Binary) + 1. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.1. Percent-Encoding] +%% +%% A percent-encoding mechanism is used to represent a data octet in a +%% component when that octet's corresponding character is outside the +%% allowed set or is being used as a delimiter of, or within, the +%% component. A percent-encoded octet is encoded as a character +%% triplet, consisting of the percent character "%" followed by the two +%% hexadecimal digits representing that octet's numeric value. For +%% example, "%20" is the percent-encoding for the binary octet +%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space +%% character (SP). Section 2.4 describes when percent-encoding and +%% decoding is applied. +%% +%% pct-encoded = "%" HEXDIG HEXDIG +%%------------------------------------------------------------------------- +-spec decode_userinfo(binary()) -> binary(). +decode_userinfo(Cs) -> + check_utf8(decode(Cs, fun is_userinfo/1, <<>>)). + +-spec decode_host(binary()) -> binary(). +decode_host(Cs) -> + check_utf8(decode(Cs, fun is_host/1, <<>>)). + +-spec decode_path(binary()) -> binary(). +decode_path(Cs) -> + check_utf8(decode(Cs, fun is_path/1, <<>>)). + +-spec decode_query(binary()) -> binary(). +decode_query(Cs) -> + check_utf8(decode(Cs, fun is_query/1, <<>>)). + +-spec decode_fragment(binary()) -> binary(). +decode_fragment(Cs) -> + check_utf8(decode(Cs, fun is_fragment/1, <<>>)). + + +%% Returns Cs if it is utf8 encoded. +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {incomplete,_,_} -> + throw({error,invalid_utf8,Cs}); + {error,_,_} -> + throw({error,invalid_utf8,Cs}); + _ -> Cs + end. + +%%------------------------------------------------------------------------- +%% Percent-encode +%%------------------------------------------------------------------------- + +%% Only validates as scheme cannot have percent-encoded characters +-spec encode_scheme(list()|binary()) -> list() | binary(). +encode_scheme([]) -> + throw({error,invalid_scheme,""}); +encode_scheme(<<>>) -> + throw({error,invalid_scheme,<<>>}); +encode_scheme(Scheme) -> + case validate_scheme(Scheme) of + true -> Scheme; + false -> throw({error,invalid_scheme,Scheme}) + end. + +-spec encode_userinfo(list()|binary()) -> list() | binary(). +encode_userinfo(Cs) -> + encode(Cs, fun is_userinfo/1). + +-spec encode_host(list()|binary()) -> list() | binary(). +encode_host(Cs) -> + case classify_host(Cs) of + regname -> Cs; + ipv4 -> Cs; + ipv6 -> bracket_ipv6(Cs); + other -> encode(Cs, fun is_reg_name/1) + end. + +-spec encode_path(list()|binary()) -> list() | binary(). +encode_path(Cs) -> + encode(Cs, fun is_path/1). + +-spec encode_query(list()|binary()) -> list() | binary(). +encode_query(Cs) -> + encode(Cs, fun is_query/1). + +-spec encode_fragment(list()|binary()) -> list() | binary(). +encode_fragment(Cs) -> + encode(Cs, fun is_fragment/1). + +%%------------------------------------------------------------------------- +%% Helper funtions for percent-decode +%%------------------------------------------------------------------------- +decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + decode(Cs, Fun, <<Acc/binary, B>>); + false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) + end; +decode(<<C,Cs/binary>>, Fun, Acc) -> + case Fun(C) of + true -> decode(Cs, Fun, <<Acc/binary, C>>); + false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>}) + end; +decode(<<>>, _Fun, Acc) -> + Acc. + +%% Check if char is allowed in host +-spec is_host(char()) -> boolean(). +is_host($:) -> true; +is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is allowed in path +-spec is_path(char()) -> boolean(). +is_path($/) -> true; +is_path(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% Helper functions for percent-encode +%%------------------------------------------------------------------------- +-spec encode(list()|binary(), fun()) -> list() | binary(). +encode(Component, Fun) when is_list(Component) -> + B = unicode:characters_to_binary(Component), + unicode:characters_to_list(encode(B, Fun, <<>>)); +encode(Component, Fun) when is_binary(Component) -> + encode(Component, Fun, <<>>). +%% +encode(<<Char/utf8, Rest/binary>>, Fun, Acc) -> + C = encode_codepoint_binary(Char, Fun), + encode(Rest, Fun, <<Acc/binary,C/binary>>); +encode(<<Char, Rest/binary>>, _Fun, _Acc) -> + throw({error,invalid_input,<<Char,Rest/binary>>}); +encode(<<>>, _Fun, Acc) -> + Acc. + + +-spec encode_codepoint_binary(integer(), fun()) -> binary(). +encode_codepoint_binary(C, Fun) -> + case Fun(C) of + false -> percent_encode_binary(C); + true -> <<C>> + end. + + +-spec percent_encode_binary(integer()) -> binary(). +percent_encode_binary(Code) -> + percent_encode_binary(<<Code/utf8>>, <<>>). + + +percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) -> + percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>); +percent_encode_binary(<<>>, Acc) -> + Acc. + + +%%------------------------------------------------------------------------- +%%------------------------------------------------------------------------- +validate_scheme([]) -> true; +validate_scheme([H|T]) -> + case is_scheme(H) of + true -> validate_scheme(T); + false -> false + end; +validate_scheme(<<>>) -> true; +validate_scheme(<<H, Rest/binary>>) -> + case is_scheme(H) of + true -> validate_scheme(Rest); + false -> false + end. + + +%%------------------------------------------------------------------------- +%% Classifies hostname into the following categories: +%% regname, ipv4 - address does not contain reserved characters to be +%% percent-encoded +%% ipv6 - address does not contain reserved characters but it shall be +%% encolsed in brackets +%% other - address shall be percent-encoded +%%------------------------------------------------------------------------- +classify_host([]) -> other; +classify_host(Addr) when is_binary(Addr) -> + A = unicode:characters_to_list(Addr), + classify_host_ipv6(A); +classify_host(Addr) -> + classify_host_ipv6(Addr). + +classify_host_ipv6(Addr) -> + case is_ipv6_address(Addr) of + true -> ipv6; + false -> classify_host_ipv4(Addr) + end. + +classify_host_ipv4(Addr) -> + case is_ipv4_address(Addr) of + true -> ipv4; + false -> classify_host_regname(Addr) + end. + +classify_host_regname([]) -> regname; +classify_host_regname([H|T]) -> + case is_reg_name(H) of + true -> classify_host_regname(T); + false -> other + end. + +is_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +is_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +bracket_ipv6(Addr) when is_binary(Addr) -> + concat(<<$[,Addr/binary>>,<<$]>>); +bracket_ipv6(Addr) when is_list(Addr) -> + [$[|Addr] ++ "]". + + +%%------------------------------------------------------------------------- +%% Helper funtions for recompose +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% Checks if input Map has valid combination of fields that can be +%% recomposed into a URI. +%% +%% The implementation is based on a decision tree that fulfills the +%% following rules: +%% - 'path' shall always be present in the input map +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% - 'host' shall be present in the input map when 'path' starts with +%% two slashes ("//") +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% path-abempty = *( "/" segment ) +%% segment = *pchar +%% - 'host' shall be present if userinfo or port is present in input map +%% authority = [ userinfo "@" ] host [ ":" port ] +%% - All fields shall be valid (scheme, userinfo, host, port, path, query +%% or fragment). +%%------------------------------------------------------------------------- +is_valid_map(#{path := Path} = Map) -> + ((starts_with_two_slash(Path) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(port, Map) andalso is_valid_map_host(Map)) + orelse + all_fields_valid(Map)); +is_valid_map(#{}) -> + false. + + +is_valid_map_host(Map) -> + maps:is_key(host, Map) andalso all_fields_valid(Map). + + +all_fields_valid(Map) -> + Fun = fun(scheme, _, Acc) -> Acc; + (userinfo, _, Acc) -> Acc; + (host, _, Acc) -> Acc; + (port, _, Acc) -> Acc; + (path, _, Acc) -> Acc; + (query, _, Acc) -> Acc; + (fragment, _, Acc) -> Acc; + (_, _, _) -> false + end, + maps:fold(Fun, true, Map). + + +starts_with_two_slash([$/,$/|_]) -> + true; +starts_with_two_slash(?STRING_REST("//", _)) -> + true; +starts_with_two_slash(_) -> false. + + +update_scheme(#{scheme := Scheme}, _) -> + add_colon_postfix(encode_scheme(Scheme)); +update_scheme(#{}, _) -> + empty. + + +update_userinfo(#{userinfo := Userinfo}, empty) -> + add_auth_prefix(encode_userinfo(Userinfo)); +update_userinfo(#{userinfo := Userinfo}, URI) -> + concat(URI,add_auth_prefix(encode_userinfo(Userinfo))); +update_userinfo(#{}, empty) -> + empty; +update_userinfo(#{}, URI) -> + URI. + + +update_host(#{host := Host}, empty) -> + add_auth_prefix(encode_host(Host)); +update_host(#{host := Host} = Map, URI) -> + concat(URI,add_host_prefix(Map, encode_host(Host))); +update_host(#{}, empty) -> + empty; +update_host(#{}, URI) -> + URI. + + +%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI +update_port(#{port := undefined}, URI) -> + concat(URI, <<":">>); +update_port(#{port := Port}, URI) -> + concat(URI,add_colon(encode_port(Port))); +update_port(#{}, URI) -> + URI. + + +update_path(#{path := Path}, empty) -> + encode_path(Path); +update_path(#{path := Path}, URI) -> + concat(URI,encode_path(Path)); +update_path(#{}, empty) -> + empty; +update_path(#{}, URI) -> + URI. + + +update_query(#{query := Query}, empty) -> + encode_query(Query); +update_query(#{query := Query}, URI) -> + concat(URI,add_question_mark(encode_query(Query))); +update_query(#{}, empty) -> + empty; +update_query(#{}, URI) -> + URI. + + +update_fragment(#{fragment := Fragment}, empty) -> + add_hashmark(encode_fragment(Fragment)); +update_fragment(#{fragment := Fragment}, URI) -> + concat(URI,add_hashmark(encode_fragment(Fragment))); +update_fragment(#{}, empty) -> + ""; +update_fragment(#{}, URI) -> + URI. + +%%------------------------------------------------------------------------- +%% Concatenates its arguments that can be lists and binaries. +%% The result is a list if at least one of its argument is a list and +%% binary otherwise. +%%------------------------------------------------------------------------- +concat(A, B) when is_binary(A), is_binary(B) -> + <<A/binary, B/binary>>; +concat(A, B) when is_binary(A), is_list(B) -> + unicode:characters_to_list(A) ++ B; +concat(A, B) when is_list(A) -> + A ++ maybe_to_list(B). + +add_hashmark(Comp) when is_binary(Comp) -> + <<$#, Comp/binary>>; +add_hashmark(Comp) when is_list(Comp) -> + [$#|Comp]. + +add_question_mark(Comp) when is_binary(Comp) -> + <<$?, Comp/binary>>; +add_question_mark(Comp) when is_list(Comp) -> + [$?|Comp]. + +add_colon(Comp) when is_binary(Comp) -> + <<$:, Comp/binary>>. + +add_colon_postfix(Comp) when is_binary(Comp) -> + <<Comp/binary,$:>>; +add_colon_postfix(Comp) when is_list(Comp) -> + Comp ++ ":". + +add_auth_prefix(Comp) when is_binary(Comp) -> + <<"//", Comp/binary>>; +add_auth_prefix(Comp) when is_list(Comp) -> + [$/,$/|Comp]. + +add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) -> + <<$@,Host/binary>>; +add_host_prefix(#{}, Host) when is_binary(Host) -> + <<"//",Host/binary>>; +add_host_prefix(#{userinfo := _}, Host) when is_list(Host) -> + [$@|Host]; +add_host_prefix(#{}, Host) when is_list(Host) -> + [$/,$/|Host]. + +maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp); +maybe_to_list(Comp) -> Comp. + +encode_port(Port) -> + integer_to_binary(Port). + +%%------------------------------------------------------------------------- +%% Helper functions for transcode +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). +%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) +%% "x%00%00%00%F6" +%% 2. Accumulate characters until percent-encoded segment (transcode/4). +%% Acc = "x" +%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) +%% <<0,0,0,246>> +%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): +%% <<195,182>> +%% 5. Percent-encode out-encoded binary: +%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> +%% 6. Convert binary to list form, reverse it and append the accumulator +%% "6B%3C%" + "x" +%% 7. Reverse Acc and return it +%%------------------------------------------------------------------------- +transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode_pct(L, Acc, <<>>, InEnc, OutEnc); +transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode(L, Acc, [], InEnc, OutEnc). +%% +transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> + transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); +transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> + transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); +transcode([], Acc, List, _InEncoding, _OutEncoding) -> + lists:reverse(List ++ Acc). + + +%% Transcode percent-encoded segment +transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding); + false -> throw({error, invalid_percent_encoding,L}) + end; +transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)), + transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); +transcode_pct([], Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = convert_to_list(PctEncUtf8, utf8), + lists:reverse(Acc) ++ Out. + + +%% Convert to binary +convert_to_binary(Binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Convert to list +convert_to_list(Binary, InEncoding) -> + case unicode:characters_to_list(Binary, InEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Flatten input list +flatten_list([], _) -> + []; +flatten_list(L, InEnc) -> + flatten_list(L, InEnc, []). +%% +flatten_list([H|T], InEnc, Acc) when is_binary(H) -> + L = convert_to_list(H, InEnc), + flatten_list(T, InEnc, lists:reverse(L) ++ Acc); +flatten_list([H|T], InEnc, Acc) when is_list(H) -> + flatten_list(H ++ T, InEnc, Acc); +flatten_list([H|T], InEnc, Acc) -> + flatten_list(T, InEnc, [H|Acc]); +flatten_list([], _InEnc, Acc) -> + lists:reverse(Acc); +flatten_list(Arg, _, _) -> + throw({error, invalid_input, Arg}). + + +percent_encode_segment(Segment) -> + percent_encode_binary(Segment, <<>>). + + +%%------------------------------------------------------------------------- +%% Helper functions for normalize +%%------------------------------------------------------------------------- + +%% 6.2.2.1. Case Normalization +normalize_case(#{scheme := Scheme, host := Host} = Map) -> + Map#{scheme => to_lower(Scheme), + host => to_lower(Host)}; +normalize_case(#{host := Host} = Map) -> + Map#{host => to_lower(Host)}; +normalize_case(#{scheme := Scheme} = Map) -> + Map#{scheme => to_lower(Scheme)}; +normalize_case(#{} = Map) -> + Map. + + +to_lower(Cs) when is_list(Cs) -> + B = convert_to_binary(Cs, utf8, utf8), + convert_to_list(to_lower(B), utf8); +to_lower(Cs) when is_binary(Cs) -> + to_lower(Cs, <<>>). +%% +to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z -> + to_lower(Cs, <<Acc/binary,(C + 32)>>); +to_lower(<<C,Cs/binary>>, Acc) -> + to_lower(Cs, <<Acc/binary,C>>); +to_lower(<<>>, Acc) -> + Acc. + + +%% 6.2.2.3. Path Segment Normalization +%% 5.2.4. Remove Dot Segments +normalize_path_segment(Map) -> + Path = maps:get(path, Map, undefined), + Map#{path => remove_dot_segments(Path)}. + + +remove_dot_segments(Path) when is_binary(Path) -> + remove_dot_segments(Path, <<>>); +remove_dot_segments(Path) when is_list(Path) -> + B = convert_to_binary(Path, utf8, utf8), + B1 = remove_dot_segments(B, <<>>), + convert_to_list(B1, utf8). +%% +remove_dot_segments(<<>>, Output) -> + Output; +remove_dot_segments(<<"../",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"./",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"/./",T/binary>>, Output) -> + remove_dot_segments(<<$/,T/binary>>, Output); +remove_dot_segments(<<"/.">>, Output) -> + remove_dot_segments(<<$/>>, Output); +remove_dot_segments(<<"/../",T/binary>>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/,T/binary>>, Out1); +remove_dot_segments(<<"/..">>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/>>, Out1); +remove_dot_segments(<<$.>>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(<<"..">>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(Input, Output) -> + {First, Rest} = first_path_segment(Input), + remove_dot_segments(Rest, <<Output/binary,First/binary>>). + + +first_path_segment(Input) -> + F = first_path_segment(Input, <<>>), + split_binary(Input, byte_size(F)). +%% +first_path_segment(<<$/,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>); +first_path_segment(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +first_path_segment_end(<<>>, Acc) -> + Acc; +first_path_segment_end(<<$/,_/binary>>, Acc) -> + Acc; +first_path_segment_end(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +remove_last_segment(<<>>) -> + <<>>; +remove_last_segment(B) -> + {Init, Last} = split_binary(B, byte_size(B) - 1), + case Last of + <<$/>> -> + Init; + _Char -> + remove_last_segment(Init) + end. + + +%% RFC 3986, 6.2.3. Scheme-Based Normalization +normalize_scheme_based(Map) -> + Scheme = maps:get(scheme, Map, undefined), + Port = maps:get(port, Map, undefined), + Path= maps:get(path, Map, undefined), + normalize_scheme_based(Map, Scheme, Port, Path). +%% +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "http"; Scheme =:= <<"http">> -> + normalize_http(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "https"; Scheme =:= <<"https">> -> + normalize_https(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> + normalize_ftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> + normalize_tftp(Map, Port); +normalize_scheme_based(Map, _, _, _) -> + Map. + + +normalize_http(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 80), + normalize_http_path(M1, Path). + + +normalize_https(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 443), + normalize_http_path(M1, Path). + + +normalize_ftp(Map, Port) -> + normalize_port(Map, Port, 21). + + +normalize_ssh_sftp(Map, Port) -> + normalize_port(Map, Port, 22). + + +normalize_tftp(Map, Port) -> + normalize_port(Map, Port, 69). + + +normalize_port(Map, Port, Default) -> + case Port of + Default -> + maps:remove(port, Map); + _Else -> + Map + end. + + +normalize_http_path(Map, Path) -> + case Path of + "" -> + Map#{path => "/"}; + <<>> -> + Map#{path => <<"/">>}; + _Else -> + Map + end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 523cb95065..8490770f3d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -87,6 +87,7 @@ MODULES= \ timer_simple_SUITE \ unicode_SUITE \ unicode_util_SUITE \ + uri_string_SUITE \ win32reg_SUITE \ y2k_SUITE \ select_SUITE \ diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README new file mode 100644 index 0000000000..57602bf719 --- /dev/null +++ b/lib/stdlib/test/property_test/README @@ -0,0 +1,12 @@ + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% %%% +%%% WARNING %%% +%%% %%% +%%% This is experimental code which may be changed or removed %%% +%%% anytime without any warning. %%% +%%% %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr. + diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl new file mode 100644 index 0000000000..e51a671172 --- /dev/null +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -0,0 +1,361 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_recompose). + +-compile(export_all). + +-proptest(eqc). +-proptest([triq,proper]). + +-ifndef(EQC). +-ifndef(PROPER). +-ifndef(TRIQ). +-define(EQC,true). +-endif. +-endif. +-endif. + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +-define(MOD_eqc,eqc). + +-else. +-ifdef(PROPER). +-include_lib("proper/include/proper.hrl"). +-define(MOD_eqc,proper). + +-else. +-ifdef(TRIQ). +-define(MOD_eqc,triq). +-include_lib("triq/include/triq.hrl"). + +-endif. +-endif. +-endif. + + +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(SCHEME, {scheme, scheme()}). +-define(USER, {userinfo, unicode()}). +-define(HOST, {host, host_map()}). +-define(PORT, {port, port()}). +-define(PATH_ABE, {path, path_abempty_map()}). +-define(PATH_ABS, {path, path_absolute_map()}). +-define(PATH_NOS, {path, path_noscheme_map()}). +-define(PATH_ROO, {path, path_rootless_map()}). +-define(PATH_EMP, {path, path_empty_map()}). +-define(QUERY, {query, query_map()}). +-define(FRAGMENT, {fragment, fragment_map()}). + + +%%%======================================================================== +%%% Properties +%%%======================================================================== + +prop_recompose() -> + ?FORALL(Map, map(), + Map =:= uri_string:parse(uri_string:recompose(Map)) + ). + +%% Stats +prop_map_key_length_collect() -> + ?FORALL(List, map(), + collect(length(maps:keys(List)), true)). + +prop_map_collect() -> + ?FORALL(List, map(), + collect(lists:sort(maps:keys(List)), true)). + +prop_scheme_collect() -> + ?FORALL(List, scheme(), + collect(length(List), true)). + + +%%%======================================================================== +%%% Generators +%%%======================================================================== + +map() -> + ?LET(Gen, comp_proplist(), proplist_to_map(Gen)). + +comp_proplist() -> + frequency([ + {2, [?SCHEME,?PATH_ABS]}, + {2, [?SCHEME,?PATH_ROO]}, + {2, [?SCHEME,?PATH_EMP]}, + {2, [?SCHEME,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]}, + + {2, [?PATH_ABS]}, + {2, [?PATH_NOS]}, + {2, [?PATH_EMP]}, + {2, [?HOST,?PATH_ABE]}, + {2, [?USER,?HOST,?PATH_ABE]}, + {2, [?HOST,?PORT,?PATH_ABE]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY]}, + {2, [?SCHEME,?PATH_ROO,?QUERY]}, + {2, [?SCHEME,?PATH_EMP,?QUERY]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + {2, [?PATH_ABS,?QUERY]}, + {2, [?PATH_NOS,?QUERY]}, + {2, [?PATH_EMP,?QUERY]}, + {2, [?HOST,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + + {2, [?SCHEME,?PATH_ABS,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + {2, [?PATH_ABS,?FRAGMENT]}, + {2, [?PATH_NOS,?FRAGMENT]}, + {2, [?PATH_EMP,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + + {2, [?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?PATH_NOS,?QUERY,?FRAGMENT]}, + {2, [?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]} + ]). + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +path_abempty_map() -> + frequency([{90, path_abe_map()}, + {10, path_empty_map()}]). + +path_abe_map() -> + ?SIZED(Length, path_abe_map(Length, [])). +%% +path_abe_map(0, Segments) -> + ?LET(Gen, Segments, lists:append(Gen)); +path_abe_map(N, Segments) -> + path_abe_map(N-1, [slash(),segment()|Segments]). + + +path_absolute_map() -> + ?SIZED(Length, path_absolute_map(Length, [])). +%% +path_absolute_map(0, Segments) -> + ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen)); +path_absolute_map(N, Segments) -> + path_absolute_map(N-1, [slash(),segment()|Segments]). + + +path_noscheme_map() -> + ?SIZED(Length, path_noscheme_map(Length, [])). +%% +path_noscheme_map(0, Segments) -> + ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen)); +path_noscheme_map(N, Segments) -> + path_noscheme_map(N-1, [slash(),segment()|Segments]). + +path_rootless_map() -> + ?SIZED(Length, path_rootless_map(Length, [])). +%% +path_rootless_map(0, Segments) -> + ?LET(Gen, [segment_nz()|Segments], lists:append(Gen)); +path_rootless_map(N, Segments) -> + path_rootless_map(N-1, [slash(),segment()|Segments]). + + +segment_nz() -> + non_empty(segment()). + +segment_nz_nc() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$@])} + ]))). + + +segment() -> + list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$:, $@])} + ])). + +slash() -> + "/". + +path_empty_map() -> + "". + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +host_map() -> + frequency([{30, reg_name()}, + {30, ip_address()} + ]). + + +reg_name() -> + list(frequency([{30, alpha()}, + {10, sub_delims()}, + {10, unicode_char()} + ])). + +ip_address() -> + oneof(["127.0.0.1", "::127.0.0.1", + "2001:0db8:0000:0000:0000:0000:1428:07ab", + "2001:0db8:0000:0000:0000::1428:07ab", + "2001:0db8:0:0:0:0:1428:07ab", + "2001:0db8:0::0:1428:07ab"]). + +%% Generating only reg-names +host_uri() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, pct_encoded()} + ]))). + +%%------------------------------------------------------------------------- +%% Port, Query, Fragment +%%------------------------------------------------------------------------- +port() -> + frequency([{10, undefined}, + {10, range(1,65535)} + ]). + +query_map() -> + unicode(). + + +query_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + +fragment_map() -> + unicode(). + +fragment_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + + +%%------------------------------------------------------------------------- +%% Scheme +%%------------------------------------------------------------------------- +scheme() -> + ?SIZED(Length, scheme_start(Length, [])). +%% +scheme_start(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme_start(N, L) -> + scheme(N-1,[alpha()|L]). + +scheme(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme(N, L) -> + scheme(N-1, [scheme_char()|L]). + + +%%------------------------------------------------------------------------- +%% Misc +%%------------------------------------------------------------------------- +unicode() -> + list(frequency([{20, alpha()}, % alpha + {10, digit()}, % digit + {10, unicode_char()} % unicode + ])). + +scheme_char() -> + frequency([{20, alpha()}, % alpha + {20, digit()}, % digit + {5, oneof([$+, $-, $.])} % punctuation + ]). + +sub_delims() -> + oneof([$!, $$, $&, $', $(, $), + $*, $+, $,,$;, $=]). + +pchar() -> + frequency([{20, unreserved()}, + {5, pct_encoded()}, + {5, sub_delims()}, + {1, oneof([$:, $@])} % punctuation + ]). + +unreserved() -> + frequency([{20, alpha()}, + {5, digit()}, + {1, oneof([$-, $., $_, $~])} % punctuation + ]). + +unicode_char() -> + range(913, 1023). + +alpha() -> + frequency([{20, range($a, $z)}, % letters + {20, range($A, $Z)}]). % letters + +digit() -> + range($0, $9). % numbers + +pct_encoded() -> + oneof(["%C3%A4", "%C3%A5", "%C3%B6"]). + + +%%%======================================================================== +%%% Helpers +%%%======================================================================== +proplist_to_map(L) -> + lists:foldl(fun({K,V},M) -> M#{K => V}; + (_,M) -> M + end, #{}, L). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl new file mode 100644 index 0000000000..c625da56c6 --- /dev/null +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -0,0 +1,844 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-export([all/0, suite/0,groups/0, + normalize/1, + parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1, + parse_binary_host_ipv6/1, + parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1, + parse_binary_pct_encoded_userinfo/1, parse_binary_port/1, + parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1, + parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1, + parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1, + parse_pct_encoded_userinfo/1, parse_port/1, + parse_query/1, parse_scheme/1, parse_userinfo/1, + parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, + parse_special/1, parse_special2/1, parse_negative/1, + recompose_fragment/1, recompose_parse_fragment/1, + recompose_query/1, recompose_parse_query/1, + recompose_path/1, recompose_parse_path/1, + recompose_autogen/1, parse_recompose_autogen/1, + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 + ]). + + +-define(SCHEME, "foo"). +-define(USERINFO, "åsa"). +-define(USERINFO_ENC, "%C3%A5sa"). +-define(HOST, "älvsjö"). +-define(HOST_ENC, "%C3%A4lvsj%C3%B6"). +-define(IPV6, "::127.0.0.1"). +-define(IPV6_ENC, "[::127.0.0.1]"). +-define(PORT, 8042). +-define(PORT_ENC, ":8042"). +-define(PATH, "/där"). +-define(PATH_ENC, "/d%C3%A4r"). +-define(QUERY, "name=örn"). +-define(QUERY_ENC, "?name=%C3%B6rn"). +-define(FRAGMENT, "näsa"). +-define(FRAGMENT_ENC, "#n%C3%A4sa"). + + +suite() -> + [{timetrap,{minutes,1}}]. + +all() -> + [ + normalize, + parse_binary_scheme, + parse_binary_userinfo, + parse_binary_pct_encoded_userinfo, + parse_binary_host, + parse_binary_host_ipv4, + parse_binary_host_ipv6, + parse_binary_port, + parse_binary_path, + parse_binary_query, + parse_binary_pct_encoded_query, + parse_binary_fragment, + parse_binary_pct_encoded_fragment, + parse_scheme, + parse_userinfo, + parse_pct_encoded_userinfo, + parse_host, + parse_host_ipv4, + parse_host_ipv6, + parse_port, + parse_path, + parse_query, + parse_pct_encoded_query, + parse_fragment, + parse_pct_encoded_fragment, + parse_list, + parse_binary, + parse_mixed, + parse_relative, + parse_special, + parse_special2, + parse_negative, + recompose_fragment, + recompose_parse_fragment, + recompose_query, + recompose_parse_query, + recompose_path, + recompose_parse_path, + recompose_autogen, + parse_recompose_autogen, + transcode_basic, + transcode_options, + transcode_mixed, + transcode_negative + ]. + +groups() -> + []. + + +%%------------------------------------------------------------------------- +%% Helper functions +%%------------------------------------------------------------------------- +uri_combinations() -> + [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] || + Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none], + Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none], + Hst <- [fun update_host/1, fun update_host_binary/1, + fun update_ipv6/1, fun update_ipv6_binary/1, none], + Prt <- [fun update_port/1, none], + Pat <- [fun update_path/1, fun update_path_binary/1], + Qry <- [fun update_query/1,fun update_query_binary/1, none], + Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none], + not (Usr =:= none andalso Hst =:= none andalso Prt =/= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =:= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)]. + + +generate_test_vector(Comb) -> + Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI}); + (_, Map) -> Map + end, + lists:foldl(Fun, {#{}, empty}, Comb). + +generate_test_vectors(L) -> + lists:map(fun generate_test_vector/1, L). + +update_fragment({In, empty}) -> + {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_list(Out) -> + {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_binary(Out) -> + {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}. + +update_fragment_binary({In, empty}) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>}; +update_fragment_binary({In, Out}) when is_list(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC}; +update_fragment_binary({In, Out}) when is_binary(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}. + + +update_query({In, empty}) -> + {In#{query => ?QUERY}, ?QUERY_ENC}; +update_query({In, Out}) when is_list(Out) -> + {In#{query => ?QUERY}, Out ++ ?QUERY_ENC}; +update_query({In, Out}) when is_binary(Out) -> + {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}. + +update_query_binary({In, empty}) -> + {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>}; +update_query_binary({In, Out}) when is_list(Out) -> + {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC}; +update_query_binary({In, Out}) when is_binary(Out) -> + {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}. + +update_path({In, empty}) -> + {In#{path => ?PATH}, ?PATH_ENC}; +update_path({In, Out}) when is_list(Out) -> + {In#{path => ?PATH}, Out ++ ?PATH_ENC}; +update_path({In, Out}) when is_binary(Out) -> + {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}. + +update_path_binary({In, empty}) -> + {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>}; +update_path_binary({In, Out}) when is_list(Out) -> + {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC}; +update_path_binary({In, Out}) when is_binary(Out) -> + {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}. + +update_port({In, Out}) when is_list(Out) -> + {In#{port => ?PORT}, Out ++ ?PORT_ENC}; +update_port({In, Out}) when is_binary(Out) -> + {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}. + +update_host({In, empty}) -> + {In#{host => ?HOST}, "//" ++ ?HOST_ENC}; +update_host({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]} + end. + +update_host_binary({In, empty}) -> + {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>}; +update_host_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>}; + false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>} + end. + +update_ipv6({In, empty}) -> + {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC}; +update_ipv6({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]} + end. + +update_ipv6_binary({In, empty}) -> + {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>}; +update_ipv6_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>}; + false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>} + end. + +update_userinfo({In, empty}) -> + {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_list(Out) -> + {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_binary(Out) -> + {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}. + +update_userinfo_binary({In, empty}) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>}; +update_userinfo_binary({In, Out}) when is_list(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo_binary({In, Out}) when is_binary(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}. + +update_scheme({In, empty}) -> + {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}. + +update_scheme_binary({In, empty}) -> + {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}. + + +%% Test recompose on a generated test vector +run_test_recompose({#{}, empty}) -> + try "" = uri_string:recompose(#{}) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_recompose({Map, URI}) -> + try URI = uri_string:recompose(Map) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + +%% Test parse - recompose on a generated test vector +run_test_parse_recompose({#{}, empty}) -> + try "" = uri_string:recompose(uri_string:parse("")) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_parse_recompose({Map, URI}) -> + try URI = uri_string:recompose(uri_string:parse(URI)) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + + +%%------------------------------------------------------------------------- +%% Parse tests +%%------------------------------------------------------------------------- + +parse_binary_scheme(_Config) -> + #{} = uri_string:parse(<<>>), + #{path := <<"foo">>} = uri_string:parse(<<"foo">>), + #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>), + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>), + + #{path := <<"/">>} = uri_string:parse(<<"/">>), + #{host := <<>>} = uri_string:parse(<<"//">>), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). + +parse_binary_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"password@localhost">>} = + uri_string:parse(<<"user:password@localhost">>), + #{path := <<"user@">>} = uri_string:parse(<<"user@">>), + #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>), + #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>), + #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>), + #{userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"//user:password@localhost">>), + #{scheme := <<"foo">>, path := <<"/user@">>} = + uri_string:parse(<<"foo:/user@">>), + #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user@localhost">>), + #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user:password@localhost">>). + +parse_binary_pct_encoded_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} = + uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>), + #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>), + #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} = + uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>), + #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>), + {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>), + {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>). + +parse_binary_host(_Config) -> + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} = + uri_string:parse(<<"foo://user@hostname">>). + +parse_binary_host_ipv4(_Config) -> + #{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>), + #{host := <<"127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//127.0.0.1/over/there">>), + #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//127.0.0.1?name=ferret">>), + #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>). + +parse_binary_host_ipv6(_Config) -> + #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>), + #{host := <<"2001:0db8:0000:0000:0000:0000:1428:07ab">>} = + uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>), + #{host := <<"::127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//[::127.0.0.1]/over/there">>), + #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>), + #{host := <<"::127.0.0.1">>, fragment := <<"nose">>} = + uri_string:parse(<<"//[::127.0.0.1]#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>), + {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>). + +parse_binary_port(_Config) -> + #{path:= <<"/:8042">>} = + uri_string:parse(<<"/:8042">>), + #{host:= <<>>, port := 8042} = + uri_string:parse(<<"//:8042">>), + #{host := <<"example.com">>, port:= 8042} = + uri_string:parse(<<"//example.com:8042">>), + #{scheme := <<"foo">>, path := <<"/:8042">>} = + uri_string:parse(<<"foo:/:8042">>), + #{scheme := <<"foo">>, host := <<>>, port := 8042} = + uri_string:parse(<<"foo://:8042">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042">>), + {error,invalid_uri,":"} = uri_string:parse(":600"), + {error,invalid_uri,"x"} = uri_string:parse("//:8042x"). + +parse_binary_path(_Config) -> + #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>), + #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>), + #{scheme := <<"foo">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo:/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo://example.com/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042/over/there">>). + +parse_binary_query(_Config) -> + #{scheme := <<"foo">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:over/there?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:/over/there?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com/?name=ferret">>), + + #{path := <<>>, query := <<"name=ferret">>} = + uri_string:parse(<<"?name=ferret">>), + #{path := <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"over/there?name=ferret">>), + #{path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/?name=ferret">>), + #{path := <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/over/there?name=ferret">>), + #{host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com?name=ferret">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com/?name=ferret">>). + +parse_binary_pct_encoded_query(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, + query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_binary_fragment(_Config) -> + #{scheme := <<"foo">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:#nose">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:over/there#nose">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:/over/there#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com/#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + + #{fragment := <<"nose">>} = + uri_string:parse(<<"#nose">>), + #{path := <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"over/there#nose">>), + #{path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"/#nose">>), + #{path := <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"/over/there#nose">>), + #{host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com#nose">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com/#nose">>). + +parse_binary_pct_encoded_fragment(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_scheme(_Config) -> + #{} = uri_string:parse(""), + #{path := "foo"} = uri_string:parse("foo"), + #{scheme := "foo"} = uri_string:parse("foo:"), + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := ""} = uri_string:parse("foo://"), + #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"), + #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"), + + #{path := "/"} = uri_string:parse("/"), + #{host := ""} = uri_string:parse("//"), + #{host := "", path := "/"} = uri_string:parse("///"). + +parse_userinfo(_Config) -> + #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"), + #{path := "user@"} = uri_string:parse("user@"), + #{path := "/user@"} = uri_string:parse("/user@"), + #{path := "user@localhost"} = uri_string:parse("user@localhost"), + #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"), + #{userinfo := "user:password", host := "localhost"} = + uri_string:parse("//user:password@localhost"), + #{scheme := "foo", path := "/user@"} = + uri_string:parse("foo:/user@"), + #{scheme := "foo", userinfo := "user", host := "localhost"} = + uri_string:parse("foo://user@localhost"), + #{scheme := "foo", userinfo := "user:password", host := "localhost"} = + uri_string:parse("foo://user:password@localhost"). + +parse_pct_encoded_userinfo(_Config) -> + #{scheme := "user", path := "合@気道"} = + uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"), + #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合", host := "気道"} = + uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合:気", host := "道"} = + uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"), + #{scheme := "foo", path := "/合気道@"} = + uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"), + #{scheme := "foo", userinfo := "合", host := "気道"} = + uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"), + #{scheme := "foo", userinfo := "合:気", host := "道"} = + uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"), + {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"), + {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@"). + + +parse_host(_Config) -> + #{host := "hostname"} = uri_string:parse("//hostname"), + #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"), + #{host := "hostname",scheme := "foo", userinfo := "user"} = + uri_string:parse("foo://user@hostname"). + +parse_host_ipv4(_Config) -> + #{host := "127.0.0.1"} = uri_string:parse("//127.0.0.1"), + #{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} = + uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"), + #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"), + #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"), + #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1"). + +parse_host_ipv6(_Config) -> + #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"), + #{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"), + #{host := "::127.0.0.1", query := "name=ferret"} = + uri_string:parse("//[::127.0.0.1]?name=ferret"), + #{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"), + {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]"). + +parse_port(_Config) -> + #{path:= "/:8042"} = + uri_string:parse("/:8042"), + #{host:= "", port := 8042} = + uri_string:parse("//:8042"), + #{host := "example.com", port:= 8042} = + uri_string:parse("//example.com:8042"), + #{scheme := "foo", path := "/:8042"} = + uri_string:parse("foo:/:8042"), + #{scheme := "foo", host := "", port := 8042} = + uri_string:parse("foo://:8042"), + #{scheme := "foo", host := "example.com", port := 8042} = + uri_string:parse("foo://example.com:8042"). + +parse_path(_Config) -> + #{path := "over/there"} = uri_string:parse("over/there"), + #{path := "/over/there"} = uri_string:parse("/over/there"), + #{scheme := "foo", path := "/over/there"} = + uri_string:parse("foo:/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there"} = + uri_string:parse("foo://example.com/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} = + uri_string:parse("foo://example.com:8042/over/there"). + +parse_query(_Config) -> + #{scheme := "foo", query := "name=ferret"} = + uri_string:parse("foo:?name=ferret"), + #{scheme := "foo", path:= "over/there", query := "name=ferret"} = + uri_string:parse("foo:over/there?name=ferret"), + #{scheme := "foo", path:= "/over/there", query := "name=ferret"} = + uri_string:parse("foo:/over/there?name=ferret"), + #{scheme := "foo", host := "example.com", query := "name=ferret"} = + uri_string:parse("foo://example.com?name=ferret"), + #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("foo://example.com/?name=ferret"), + + #{path := "", query := "name=ferret"} = + uri_string:parse("?name=ferret"), + #{path := "over/there", query := "name=ferret"} = + uri_string:parse("over/there?name=ferret"), + #{path := "/", query := "name=ferret"} = + uri_string:parse("/?name=ferret"), + #{path := "/over/there", query := "name=ferret"} = + uri_string:parse("/over/there?name=ferret"), + #{host := "example.com", query := "name=ferret"} = + uri_string:parse("//example.com?name=ferret"), + #{host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("//example.com/?name=ferret"). + +parse_pct_encoded_query(_Config) -> + #{scheme := "foo", host := "example.com", path := "/", + query := "name=合気道"} = + uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", query := "name=合気道"} = + uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"). + +parse_fragment(_Config) -> + #{scheme := "foo", fragment := "nose"} = + uri_string:parse("foo:#nose"), + #{scheme := "foo", path:= "over/there", fragment := "nose"} = + uri_string:parse("foo:over/there#nose"), + #{scheme := "foo", path:= "/over/there", fragment := "nose"} = + uri_string:parse("foo:/over/there#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("foo://example.com/#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + + #{fragment := "nose"} = + uri_string:parse("#nose"), + #{path := "over/there", fragment := "nose"} = + uri_string:parse("over/there#nose"), + #{path := "/", fragment := "nose"} = + uri_string:parse("/#nose"), + #{path := "/over/there", fragment := "nose"} = + uri_string:parse("/over/there#nose"), + #{host := "example.com", fragment := "nose"} = + uri_string:parse("//example.com#nose"), + #{host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("//example.com/#nose"). + +parse_pct_encoded_fragment(_Config) -> + #{scheme := "foo", host := "example.com", fragment := "合気道"} = + uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", fragment := "合気道"} = + uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93"). + +parse_list(_Config) -> + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"), + #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://admin:[email protected]:8042/over/there?name=ferret#nose"). + +parse_binary(_Config) -> + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>), + #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://admin:[email protected]:8042/over/there?name=ferret#nose">>). + + +parse_mixed(_Config) -> + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("fo",<<"o:bar">>)), + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("foo:b",<<"ar">>)), + #{scheme := "foo", path := "bar:bar"} = + uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]). + +parse_relative(_Config) -> + #{path := "/path"} = + uri_string:parse(lists:append("/pa",<<"th">>)), + #{path := "foo"} = + uri_string:parse(lists:append("fo",<<"o">>)). + +parse_special(_Config) -> + #{host := [],query := []} = uri_string:parse("//?"), + #{fragment := [],host := []} = uri_string:parse("//#"), + #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"), + #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>), + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>), + #{host := [],path := "/",query := []} = uri_string:parse("///?"), + #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"), + #{host := "foo",query := []} = uri_string:parse("//foo?"), + #{fragment := [],host := "foo"} = uri_string:parse("//foo#"), + #{host := "foo",path := "/"} = uri_string:parse("//foo/"), + #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"), + #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"), + #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"), + #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"), + #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"), + #{path := [],query := []} = uri_string:parse("?"), + #{path := [],query := "?"} = uri_string:parse("??"), + #{path := [],query := "??"} = uri_string:parse("???"). + +parse_special2(_Config) -> + #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"), + #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"), + #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), + #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), + #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"), + #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"), + #{host := [],path := [],port := undefined} = uri_string:parse("//:"). + +parse_negative(_Config) -> + {error,invalid_uri,"å"} = uri_string:parse("å"), + {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"), + {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"), + {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"), + {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"), + {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"), + {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8"). + + +%%------------------------------------------------------------------------- +%% Recompose tests +%%------------------------------------------------------------------------- +recompose_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}), + ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}). + +recompose_parse_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)), + ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)). + +recompose_query(_Config) -> + <<?QUERY_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}), + <<?QUERY_ENC?FRAGMENT_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, + fragment => <<?FRAGMENT/utf8>>, + path => <<>>}), + "?name=%C3%B6rn" = + uri_string:recompose(#{query => "name=örn", path => ""}), + "?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{query => "name=örn", + fragment => "näsa", + path => ""}). + +recompose_parse_query(_Config) -> + <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)), + <<"?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)), + "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")), + "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")). + +recompose_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(#{path => <<"/där"/utf8>>}), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + fragment => <<"näsa"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>, + fragment => <<"näsa"/utf8>>}), + + + "/d%C3%A4r" = + uri_string:recompose(#{path => "/där"}), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + fragment => "näsa"}), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(#{path => "/där", + query => "name=örn"}), + "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + query => "name=örn", + fragment => "näsa"}). + + +recompose_parse_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)), + + "/d%C3%A4r" = + uri_string:recompose(uri_string:parse("/d%C3%A4r")), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")). + + +recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_recompose/1, Tests). + +parse_recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_parse_recompose/1, Tests). + +transcode_basic(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]). + +transcode_options(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []), + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]). + +transcode_mixed(_Config) -> + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). + +transcode_negative(_Config) -> + {error,invalid_percent_encoding,"%BXbar"} = + uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + {error,invalid_input,<<"ö">>} = + uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). + +normalize(_Config) -> + "/a/g" = uri_string:normalize("/a/b/c/./../../g"), + <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>), + "http://localhost-%C3%B6rebro/a/g" = + uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"), + <<"http://localhost-%C3%B6rebro/a/g">> = + uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>), + <<"https://localhost/">> = + uri_string:normalize(<<"https://localhost:443">>), + <<"https://localhost:445/">> = + uri_string:normalize(<<"https://localhost:445">>), + <<"ftp://localhost">> = + uri_string:normalize(<<"ftp://localhost:21">>), + <<"ssh://localhost">> = + uri_string:normalize(<<"ssh://localhost:22">>), + <<"sftp://localhost">> = + uri_string:normalize(<<"sftp://localhost:22">>), + <<"tftp://localhost">> = + uri_string:normalize(<<"tftp://localhost:69">>). diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl new file mode 100644 index 0000000000..ae2c61c7aa --- /dev/null +++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl @@ -0,0 +1,39 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_property_test_SUITE). + +-include_lib("common_test/include/ct.hrl"). +-compile(export_all). + +all() -> [recompose]. + +init_per_suite(Config) -> + ct_property_test:init_per_suite(Config). + +end_per_suite(Config) -> + Config. + +%%%======================================================================== +%%% Test suites +%%%======================================================================== +recompose(Config) -> + ct_property_test:quickcheck( + uri_string_recompose:prop_recompose(), + Config). |