diff options
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/rand.xml | 118 | ||||
-rw-r--r-- | lib/stdlib/doc/src/ref_man.xml | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/specs.xml | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 230 | ||||
-rw-r--r-- | lib/stdlib/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/src/base64.erl | 108 | ||||
-rw-r--r-- | lib/stdlib/src/ets.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/src/filename.erl | 17 | ||||
-rw-r--r-- | lib/stdlib/src/rand.erl | 261 | ||||
-rw-r--r-- | lib/stdlib/src/stdlib.app.src | 1 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 1842 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/ets_SUITE.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/test/filename_SUITE.erl | 120 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/README | 12 | ||||
-rw-r--r-- | lib/stdlib/test/property_test/uri_string_recompose.erl | 361 | ||||
-rw-r--r-- | lib/stdlib/test/qlc_SUITE.erl | 16 | ||||
-rw-r--r-- | lib/stdlib/test/rand_SUITE.erl | 217 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 844 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_property_test_SUITE.erl | 39 |
21 files changed, 4124 insertions, 71 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 93eac8220d..aeed79408b 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -98,6 +98,7 @@ XML_REF3_FILES = \ sys.xml \ timer.xml \ unicode.xml \ + uri_string.xml \ win32reg.xml \ zip.xml diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml index 89fb858823..21f680a0ee 100644 --- a/lib/stdlib/doc/src/rand.xml +++ b/lib/stdlib/doc/src/rand.xml @@ -133,8 +133,9 @@ variable <c>rand_seed</c> to remember the current state.</p> <p>If a process calls - <seealso marker="#uniform-0"><c>uniform/0</c></seealso> or - <seealso marker="#uniform-1"><c>uniform/1</c></seealso> without + <seealso marker="#uniform-0"><c>uniform/0</c></seealso>, + <seealso marker="#uniform-1"><c>uniform/1</c></seealso> or + <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso> without setting a seed first, <seealso marker="#seed-1"><c>seed/1</c></seealso> is called automatically with the default algorithm and creates a non-constant seed.</p> @@ -168,10 +169,17 @@ R3 = rand:uniform(),</pre> S0 = rand:seed_s(exrop), {R4, S1} = rand:uniform_s(S0),</pre> + <p>Textbook basic form Box-Muller standard normal deviate</p> + + <pre> +R5 = rand:uniform_real(), +R6 = rand:uniform(), +SND0 = math:sqrt(-2 * math:log(R5)) * math:cos(math:pi() * R6)</pre> + <p>Create a standard normal deviate:</p> <pre> -{SND0, S2} = rand:normal_s(S1),</pre> +{SND1, S2} = rand:normal_s(S1),</pre> <p>Create a normal deviate with mean -3 and variance 0.5:</p> @@ -414,7 +422,8 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre> This function may return exactly <c>0.0</c> which can be fatal for certain applications. If that is undesired you can use <c>(1.0 - rand:uniform())</c> to get the - interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + interval <c>0.0 < <anno>X</anno> =< 1.0</c>, or instead use + <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso>. </p> <p> If neither endpoint is desired you can test and re-try @@ -432,6 +441,42 @@ end.</pre> </func> <func> + <name name="uniform_real" arity="0"/> + <fsummary>Return a random float.</fsummary> + <desc><marker id="uniform_real-0"/> + <p> + Returns a random float + uniformly distributed in the value range + <c>DBL_MIN =< <anno>X</anno> < 1.0</c> + and updates the state in the process dictionary. + </p> + <p> + Conceptually, a random real number <c>R</c> is generated + from the interval <c>0 =< R < 1</c> and then the + closest rounded down normalized number + in the IEEE 754 Double precision format + is returned. + </p> + <note> + <p> + The generated numbers from this function has got better + granularity for small numbers than the regular + <seealso marker="#uniform-0"><c>uniform/0</c></seealso> + because all bits in the mantissa are random. + This property, in combination with the fact that exactly zero + is never returned is useful for algoritms doing for example + <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>. + </p> + </note> + <p> + See + <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso> + for more explanation. + </p> + </desc> + </func> + + <func> <name name="uniform" arity="1"/> <fsummary>Return a random integer.</fsummary> <desc><marker id="uniform-1"/> @@ -460,7 +505,8 @@ end.</pre> This function may return exactly <c>0.0</c> which can be fatal for certain applications. If that is undesired you can use <c>(1.0 - rand:uniform(State))</c> to get the - interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + interval <c>0.0 < <anno>X</anno> =< 1.0</c>, or instead use + <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>. </p> <p> If neither endpoint is desired you can test and re-try @@ -478,6 +524,68 @@ end.</pre> </func> <func> + <name name="uniform_real_s" arity="1"/> + <fsummary>Return a random float.</fsummary> + <desc> + <p> + Returns, for a specified state, a random float + uniformly distributed in the value range + <c>DBL_MIN =< <anno>X</anno> < 1.0</c> + and updates the state in the process dictionary. + </p> + <p> + Conceptually, a random real number <c>R</c> is generated + from the interval <c>0 =< R < 1</c> and then the + closest rounded down normalized number + in the IEEE 754 Double precision format + is returned. + </p> + <note> + <p> + The generated numbers from this function has got better + granularity for small numbers than the regular + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso> + because all bits in the mantissa are random. + This property, in combination with the fact that exactly zero + is never returned is useful for algoritms doing for example + <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>. + </p> + </note> + <p> + The concept implicates that the probability to get + exactly zero is extremely low; so low that this function + is in fact guaranteed to never return zero. The smallest + number that it might return is <c>DBL_MIN</c>, which is + 2.0^(-1022). + </p> + <p> + The value range stated at the top of this function + description is technically correct, but + <c>0.0 =< <anno>X</anno> < 1.0</c> + is a better description of the generated numbers' + statistical distribution. Except that exactly 0.0 + is never returned, which is not possible to observe + statistically. + </p> + <p> + For example; for all sub ranges + <c>N*2.0^(-53) =< X < (N+1)*2.0^(-53)</c> + where + <c>0 =< integer(N) < 2.0^53</c> + the probability is the same. + Compare that with the form of the numbers generated by + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>. + </p> + <p> + Having to generate extra random bits for + small numbers costs a little performance. + This function is about 20% slower than the regular + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso> + </p> + </desc> + </func> + + <func> <name name="uniform_s" arity="2"/> <fsummary>Return a random integer.</fsummary> <desc> diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml index 878a3babc5..68bfddbc71 100644 --- a/lib/stdlib/doc/src/ref_man.xml +++ b/lib/stdlib/doc/src/ref_man.xml @@ -93,6 +93,7 @@ <xi:include href="sys.xml"/> <xi:include href="timer.xml"/> <xi:include href="unicode.xml"/> + <xi:include href="uri_string.xml"/> <xi:include href="win32reg.xml"/> <xi:include href="zip.xml"/> </application> diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml index 45b207b13d..d559adf9b6 100644 --- a/lib/stdlib/doc/src/specs.xml +++ b/lib/stdlib/doc/src/specs.xml @@ -60,6 +60,7 @@ <xi:include href="../specs/specs_sys.xml"/> <xi:include href="../specs/specs_timer.xml"/> <xi:include href="../specs/specs_unicode.xml"/> + <xi:include href="../specs/specs_uri_string.xml"/> <xi:include href="../specs/specs_win32reg.xml"/> <xi:include href="../specs/specs_zip.xml"/> </specs> diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml new file mode 100644 index 0000000000..9ace2b0a05 --- /dev/null +++ b/lib/stdlib/doc/src/uri_string.xml @@ -0,0 +1,230 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2017</year><year>2017</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + </legalnotice> + + <title>uri_string</title> + <prepared>Péter Dimitrov</prepared> + <docno>1</docno> + <date>2017-10-24</date> + <rev>A</rev> + </header> + <module>uri_string</module> + <modulesummary>URI processing functions.</modulesummary> + <description> + <p>This module contains functions for parsing and handling URIs + (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>). + </p> + <p>A URI is an identifier consisting of a sequence of characters matching the syntax + rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>. + </p> + <p> The generic URI syntax consists of a hierarchical sequence of components referred + to as the scheme, authority, path, query, and fragment:</p> + <pre> + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + </pre><br></br> + <p>The interpretation of a URI depends only on the characters used and not on how those + characters are represented in a network protocol.</p> + <p>The functions implemented by this module cover the following use cases:</p> + <list type="bulleted"> + <item>Parsing URIs into its components and returing a map<br></br> + <seealso marker="#parse/1"><c>parse/1</c></seealso> + </item> + <item>Recomposing a map of URI components into a URI string<br></br> + <seealso marker="#recompose/1"><c>recompose/1</c></seealso> + </item> + <item>Changing inbound binary and percent-encoding of URIs<br></br> + <seealso marker="#transcode/2"><c>transcode/2</c></seealso> + </item> + <item>Transforming URIs into a normalized form<br></br> + <seealso marker="#normalize/1"><c>normalize/1</c></seealso> + </item> + </list> + <p>There are four different encodings present during the handling of URIs:</p> + <list type="bulleted"> + <item>Inbound binary encoding in binaries</item> + <item>Inbound percent-encoding in lists and binaries</item> + <item>Outbound binary encoding in binaries</item> + <item>Outbound percent-encoding in lists and binaries</item> + </list> + <p>Functions with <c>uri_string()</c> argument accept lists, binaries and + mixed lists (lists with binary elements) as input type. All of the functions but + <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries + and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p> + <p>Unless otherwise specified the return value type and encoding are the same as the input + type and encoding. That is, binary input returns binary output, list input returns a list + output but mixed input returns list output.</p> + <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding + and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert + between the supported encodings, it takes a <c>uri_string()</c> and a list of options + specifying inbound and outbound encodings.</p> + <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific + character encoding and it is usually defined by the protocol or surrounding text. This library + takes the same assumption, binary and percent-encoding are handled as one configuration unit, + they cannot be set to different values.</p> + </description> + + <datatypes> + <datatype> + <name name="error"/> + <desc> + <p>Error tuple indicating the type of error. Possible values of the second component:</p> + <list type="bulleted"> + <item><c>invalid_input</c></item> + <item><c>invalid_map</c></item> + <item><c>invalid_percent_encoding</c></item> + <item><c>invalid_scheme</c></item> + <item><c>invalid_uri</c></item> + <item><c>invalid_utf8</c></item> + </list> + <p>The third component is a term providing additional information about the + cause of the error.</p> + </desc> + </datatype> + <datatype> + <name name="uri_map"/> + <desc> + <p>Map holding the main components of a URI.</p> + </desc> + </datatype> + <datatype> + <name name="uri_string"/> + <desc> + <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two, + representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant URI (<em>percent-encoded form</em>). + A URI is a sequence of characters from a very limited set: the letters of + the basic Latin alphabet, digits, and a few special characters.</p> + </desc> + </datatype> + </datatypes> + + <funcs> + + <func> + <name name="normalize" arity="1"/> + <fsummary>Syntax-based normalization.</fsummary> + <desc> + <p>Transforms <c><anno>URIString</anno></c> into a normalized form + using Syntax-Based Normalization as defined by + <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p> + <p>This function implements case normalization, percent-encoding + normalization, path segment normalization and scheme based normalization + for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:normalize("/a/b/c/./../../g").</input> +"/a/g" +2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]> +<![CDATA[<<"mid/6">>]]> +3> uri_string:normalize("http://localhost:80"). +"https://localhost/" + </pre> + </desc> + </func> + + <func> + <name name="parse" arity="1"/> + <fsummary>Parse URI into a map.</fsummary> + <desc> + <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed + components of the <c>URI</c>. + If parsing fails, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#recompose/1"> + <c>recompose/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input> +#{fragment => "nose",host => "example.com", + path => "/over/there",port => 8042,query => "name=ferret", + scheme => foo,userinfo => "user"} +2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]> +<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>, + port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>, + userinfo => <<"user">>}]]> + </pre> + </desc> + </func> + + <func> + <name name="recompose" arity="1"/> + <fsummary>Recompose URI.</fsummary> + <desc> + <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant + <c><anno>URIString</anno></c> (percent-encoded), based on the components of + <c><anno>URIMap</anno></c>. + If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#parse/1"> + <c>parse/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input> +1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. +#{fragment => "top",host => "example.com", + path => "/over/there",port => 8042,query => "?name=ferret", + scheme => foo,userinfo => "user"} + +2> <input>uri_string:recompose(URIMap).</input> +"foo://example.com:8042/over/there?name=ferret#nose"</pre> + </desc> + </func> + + <func> + <name name="transcode" arity="2"/> + <fsummary>Transcode URI.</fsummary> + <desc> + <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c><anno>URIString</anno></c>, + where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound + (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c> + and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the + input and output data. Mixed encoding, where binary encoding is not the same as + percent-encoding, is not supported. + If an argument is invalid, an error tuple is returned.</p> + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input> +1> [{in_encoding, utf32},{out_encoding, utf8}]). +<![CDATA[<<"foo%C3%B6bar"/utf8>>]]> +2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1}, +2> {out_encoding, utf8}]). +"foo%C3%B6bar" + </pre> + </desc> + </func> + + </funcs> +</erlref> diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile index bf836203ec..8b156929d7 100644 --- a/lib/stdlib/src/Makefile +++ b/lib/stdlib/src/Makefile @@ -121,6 +121,7 @@ MODULES= \ timer \ unicode \ unicode_util \ + uri_string \ win32reg \ zip diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index 5885745fb1..c8cf6fdffe 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -113,9 +113,9 @@ encode_binary(Bin) -> Data :: ascii_binary(). decode(Bin) when is_binary(Bin) -> - decode_binary(<<>>, Bin); + decode_binary(Bin, <<>>); decode(List) when is_list(List) -> - list_to_binary(decode_l(List)). + decode_list(List, <<>>). -spec mime_decode(Base64) -> Data when Base64 :: ascii_string() | ascii_binary(), @@ -186,31 +186,41 @@ mime_decode_to_string(List) when is_list(List) -> bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). -decode_binary(Result0, <<C:8,T0/bits>>) -> - case element(C, ?DECODE_MAP) of - bad -> - erlang:error({badarg,C}); - ws -> - decode_binary(Result0, T0); - eq -> - case strip_ws(T0) of - <<$=:8,T/binary>> -> - <<>> = strip_ws(T), - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - T -> - <<>> = strip_ws(T), - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result - end; - Bits -> - decode_binary(<<Result0/bits,Bits:6>>, T0) +decode_binary(<<C1:8, Cs/bits>>, A) -> + case element(C1, ?DECODE_MAP) of + ws -> decode_binary(Cs, A); + B1 -> decode_binary(Cs, A, B1) end; -decode_binary(Result, <<>>) -> - true = is_binary(Result), - Result. +decode_binary(<<>>, A) -> + A. + +decode_binary(<<C2:8, Cs/bits>>, A, B1) -> + case element(C2, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1); + B2 -> decode_binary(Cs, A, B1, B2) + end. + +decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) -> + case element(C3, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1, B2); + B3 -> decode_binary(Cs, A, B1, B2, B3) + end. + +decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) -> + case element(C4, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); + eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) + end. + +only_ws_binary(<<>>, A) -> + A; +only_ws_binary(<<C:8, Cs/bits>>, A) -> + case element(C, ?DECODE_MAP) of + ws -> only_ws_binary(Cs, A); + _ -> erlang:error(function_clause) + end. %% Skipping pad character if not at end of string. Also liberal about %% excess padding and skipping of other illegal (non-base64 alphabet) @@ -262,6 +272,42 @@ mime_decode_binary_after_eq(Result0, <<>>, Eq) -> Result end. +decode_list([C1 | Cs], A) -> + case element(C1, ?DECODE_MAP) of + ws -> decode_list(Cs, A); + B1 -> decode_list(Cs, A, B1) + end; +decode_list([], A) -> + A. + +decode_list([C2 | Cs], A, B1) -> + case element(C2, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1); + B2 -> decode_list(Cs, A, B1, B2) + end. + +decode_list([C3 | Cs], A, B1, B2) -> + case element(C3, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1, B2); + B3 -> decode_list(Cs, A, B1, B2, B3) + end. + +decode_list([C4 | Cs], A, B1, B2, B3) -> + case element(C4, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); + eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) + end. + +only_ws([], A) -> + A; +only_ws([C | Cs], A) -> + case element(C, ?DECODE_MAP) of + ws -> only_ws(Cs, A); + _ -> erlang:error(function_clause) + end. + decode([], A) -> A; decode([$=,$=,C2,C1|Cs], A) -> Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), @@ -292,16 +338,6 @@ strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). -strip_ws(<<$\t,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\n,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\r,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\s,T/binary>>) -> - strip_ws(T); -strip_ws(T) -> T. - %% Skipping pad character if not at end of string. Also liberal about %% excess padding and skipping of other illegal (non-base64 alphabet) %% characters. See section 3.3 of RFC4648 diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl index 4858c8d13c..b6548626f3 100644 --- a/lib/stdlib/src/ets.erl +++ b/lib/stdlib/src/ets.erl @@ -1700,6 +1700,8 @@ choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) -> io:format("~ts\n", [ErrorString]), choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end; + eof -> + ok; _ -> choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end. diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 919f8f20e6..a322bd002d 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -472,6 +472,10 @@ join(Name1, Name2) when is_atom(Name2) -> join1([UcLetter, $:|Rest], RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1([$\\,$\\|Rest], RelativeName, [], win32) -> + join1([$/,$/|Rest], RelativeName, [], win32); +join1([$/,$/|Rest], RelativeName, [], win32) -> + join1(Rest, RelativeName, [$/,$/], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> @@ -500,6 +504,10 @@ join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1b(<<UcLetter, $:, Rest/binary>>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,$\\,Rest/binary>>, RelativeName, [], win32) -> + join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32); +join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32) -> + join1b(Rest, RelativeName, [$/,$/], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> @@ -510,6 +518,8 @@ join1b(<<>>, <<>>, Result, OsType) -> list_to_binary(maybe_remove_dirsep(Result, OsType)); join1b(<<>>, RelativeName, [$:|Rest], win32) -> join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/,$/|Result], win32) -> + join1b(RelativeName, <<>>, [$/,$/|Result], win32); join1b(<<>>, RelativeName, [$/|Result], OsType) -> join1b(RelativeName, <<>>, [$/|Result], OsType); join1b(<<>>, RelativeName, [$., $/|Result], OsType) -> @@ -523,6 +533,8 @@ maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> [$/]; +maybe_remove_dirsep([$/,$/], win32) -> + [$/,$/]; maybe_remove_dirsep([$/|Name], _) -> lists:reverse(Name); maybe_remove_dirsep(Name, _) -> @@ -712,6 +724,9 @@ win32_splitb(<<Letter0,$:,Rest/binary>>) when ?IS_DRIVELETTER(Letter0) -> Letter = fix_driveletter(Letter0), L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<Letter,$:>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<<Slash,Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<"//">> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(<<Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<$/>> | [ X || X <- L, X =/= <<>> ]]; @@ -723,6 +738,8 @@ win32_splitb(Name) -> unix_split(Name) -> split(Name, [], unix). +win32_split([Slash,Slash|Rest]) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + split(Rest, [[$/,$/]], win32); win32_split([$\\|Rest]) -> win32_split([$/|Rest]); win32_split([X, $\\|Rest]) when is_integer(X) -> diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl index 7a8a5e6d4a..362e98006e 100644 --- a/lib/stdlib/src/rand.erl +++ b/lib/stdlib/src/rand.erl @@ -21,8 +21,8 @@ %% Multiple PRNG module for Erlang/OTP %% Copyright (c) 2015-2016 Kenji Rikitake %% -%% exrop (xoroshiro116+) added and statistical distribution -%% improvements by the Erlang/OTP team 2017 +%% exrop (xoroshiro116+) added, statistical distribution +%% improvements and uniform_real added by the Erlang/OTP team 2017 %% ===================================================================== -module(rand). @@ -30,10 +30,14 @@ -export([seed_s/1, seed_s/2, seed/1, seed/2, export_seed/0, export_seed_s/1, uniform/0, uniform/1, uniform_s/1, uniform_s/2, + uniform_real/0, uniform_real_s/1, jump/0, jump/1, normal/0, normal/2, normal_s/1, normal_s/3 ]). +%% Debug +-export([make_float/3, float2str/1, bc64/1]). + -compile({inline, [exs64_next/1, exsplus_next/1, exs1024_next/1, exs1024_calc/2, exrop_next/1, exrop_next_s/2, @@ -60,6 +64,10 @@ %% N i evaluated 3 times (?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))). +-define( + BC(V, N), + bc((V), ?BIT((N) - 1), N)). + %%-define(TWO_POW_MINUS53, (math:pow(2, -53))). -define(TWO_POW_MINUS53, 1.11022302462515657e-16). @@ -84,14 +92,21 @@ %% The 'bits' field indicates how many bits the integer %% returned from 'next' has got, i.e 'next' shall return %% an random integer in the range 0..(2^Bits - 1). -%% At least 53 bits is required for the floating point -%% producing fallbacks. This field is only used when -%% the 'uniform' or 'uniform_n' fields are not defined. +%% At least 55 bits is required for the floating point +%% producing fallbacks, but 56 bits would be more future proof. %% %% The fields 'next', 'uniform' and 'uniform_n' -%% implement the algorithm. If 'uniform' or 'uinform_n' +%% implement the algorithm. If 'uniform' or 'uniform_n' %% is not present there is a fallback using 'next' and either -%% 'bits' or the deprecated 'max'. +%% 'bits' or the deprecated 'max'. The 'next' function +%% must generate a word with at least 56 good random bits. +%% +%% The 'weak_low_bits' field indicate how many bits are of +%% lesser quality and they will not be used by the floating point +%% producing functions, nor by the range producing functions +%% when more bits are needed, to avoid weak bits in the middle +%% of the generated bits. The lowest bits from the range +%% functions still have the generator's quality. %% -type alg_handler() :: #{type := alg(), @@ -148,11 +163,7 @@ %% For ranges larger than the algorithm bit size uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) -> - WeakLowBits = - case Alg of - #{weak_low_bits:=WLB} -> WLB; - #{} -> 0 - end, + WeakLowBits = maps:get(weak_low_bits, Alg, 0), %% Maybe waste the lowest bit(s) when shifting in new bits Shift = Bits - WeakLowBits, ShiftMask = bnot ?MASK(WeakLowBits), @@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> {(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}}; uniform_s({#{max:=Max, next:=Next} = Alg, R0}) -> {V, R1} = Next(R0), - %% Old broken algorithm with non-uniform density + %% Old algorithm with non-uniform density {V / (Max + 1), {Alg, R1}}. @@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0}) ?uniform_range(N, Alg, R1, V, MaxMinusN, I); uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0}) when is_integer(N), 1 =< N -> - %% Old broken algorithm with skewed probability + %% Old algorithm with skewed probability %% and gap in ranges > Max {V, R1} = Next(R0), if @@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0}) {trunc(F * N) + 1, {Alg, R1}} end. +%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0, +%% updating the state in the process dictionary. + +-spec uniform_real() -> X :: float(). +uniform_real() -> + {X, Seed} = uniform_real_s(seed_get()), + _ = seed_put(Seed), + X. + +%% uniform_real_s/1: given a state, uniform_s/1 +%% returns a random float X where 0.0 < X =< 1.0, +%% and a new state. +%% +%% This function does not use the same form of uniformity +%% as the uniform_s/1 function. +%% +%% Instead, this function does not generate numbers with equal +%% distance in the interval, but rather tries to keep all mantissa +%% bits random also for small numbers, meaning that the distance +%% between possible numbers decreases when the numbers +%% approaches 0.0, as does the possibility for a particular +%% number. Hence uniformity is preserved. +%% +%% To generate 56 bits at the time instead of 53 is actually +%% a speed optimization since the probability to have to +%% generate a second word decreases by 1/2 for every extra bit. +%% +%% This function generates normalized numbers, so the smallest number +%% that can be generated is 2^-1022 with the distance 2^-1074 +%% to the next to smallest number, compared to 2^-53 for uniform_s/1. +%% +%% This concept of uniformity should work better for applications +%% where you need to calculate 1.0/X or math:log(X) since those +%% operations benefits from larger precision approaching 0.0, +%% and that this function does not return 0.0 nor denormalized +%% numbers very close to 0.0. The log() operation in The Box-Muller +%% transformation for normal distribution is an example of this. +%% +%%-define(TWO_POW_MINUS55, (math:pow(2, -55))). +%%-define(TWO_POW_MINUS110, (math:pow(2, -110))). +%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17). +%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34). +%% +-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}. +uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number without using the weak low bits. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits) + end; +uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number. + %% Ignore the weak low bits for these old algorithms, + %% just produce something reasonable. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = ?MASK(56, V1), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, 56) + end. + +uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56 + %% This is a very theoretical bottom case. + %% The odds of getting here is about 2^-1008, + %% through a white box test case, or thanks to + %% a malfunctioning PRNG producing 18 56-bit zeros in a row. + %% + %% Fill up to 53 bits, we have at most 52 + B0 = (53 - ?BC(M0, 52)), % Missing bits + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)), + {Alg, R1}}; +uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) -> + if + %% Optimize the most probable. + %% Fill up to 53 bits. + ?BIT(51) =< M0 -> + %% We have 52 bits in M0 - need 1 + {(((M0 bsl 1) bor (V1 bsr (Bits - 1))) + * math:pow(2.0, BitNo - 1)), + {Alg, R1}}; + ?BIT(50) =< M0 -> + %% We have 51 bits in M0 - need 2 + {(((M0 bsl 2) bor (V1 bsr (Bits - 2))) + * math:pow(2.0, BitNo - 2)), + {Alg, R1}}; + ?BIT(49) =< M0 -> + %% We have 50 bits in M0 - need 3 + {(((M0 bsl 3) bor (V1 bsr (Bits - 3))) + * math:pow(2.0, BitNo - 3)), + {Alg, R1}}; + M0 == 0 -> + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}}; + BitNo =:= -1008 -> + %% Endgame + %% For the last round we can not have 14 zeros or more + %% at the top of M1 because then we will underflow, + %% so we need at least 43 bits + if + ?BIT(42) =< M1 -> + %% We have 43 bits - get the last bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1); + true -> + %% Would underflow 2^-1022 - start all over + %% + %% We could just crash here since the odds for + %% the PRNG being broken is much higher than + %% for a good PRNG generating this many zeros + %% in a row. Maybe we should write an error + %% report or call this a system limit...? + uniform_real_s({Alg, R1}) + end; + true -> + %% Need more bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1) + end; + true -> + %% Fill up to 53 bits + B0 = 53 - ?BC(M0, 49), % Number of bits we need to append + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) + * math:pow(2.0, BitNo - B0)), + {Alg, R1}} + end. +%% +uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits); +uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56). + %% jump/1: given a state, jump/1 %% returns a new state which is equivalent to that %% after a large number of call defined for each algorithm. @@ -1025,3 +1219,42 @@ normal_fi(Indx) -> 1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03, 5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03, 1.2602859304985975e-03}). + +%%%bitcount64(0) -> 0; +%%%bitcount64(V) -> 1 + bitcount(V, 64). +%%% +%%%-define( +%%% BITCOUNT(V, N), +%%% bitcount(V, N) -> +%%% if +%%% (1 bsl ((N) bsr 1)) =< (V) -> +%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1)); +%%% true -> +%%% bitcount((V), ((N) bsr 1)) +%%% end). +%%%?BITCOUNT(V, 64); +%%%?BITCOUNT(V, 32); +%%%?BITCOUNT(V, 16); +%%%?BITCOUNT(V, 8); +%%%?BITCOUNT(V, 4); +%%%?BITCOUNT(V, 2); +%%%bitcount(_, 1) -> 0. + +bc64(V) -> ?BC(V, 64). + +%% Linear from high bit - higher probability first gives faster execution +bc(V, B, N) when B =< V -> N; +bc(V, B, N) -> bc(V, B bsr 1, N - 1). + +make_float(S, E, M) -> + <<F/float>> = <<S:1, E:11, M:52>>, + F. + +float2str(N) -> + <<S:1, E:11, M:52>> = <<(float(N))/float>>, + lists:flatten( + io_lib:format( + "~c~c.~13.16.0bE~b", + [case S of 1 -> $-; 0 -> $+ end, + case E of 0 -> $0; _ -> $1 end, + M, E - 16#3ff])). diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index ab0824ca17..5fb48acfab 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -101,6 +101,7 @@ timer, unicode, unicode_util, + uri_string, win32reg, zip]}, {registered,[timer_server,rsh_starter,take_over_monitor,pool_master, diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl new file mode 100644 index 0000000000..22212da222 --- /dev/null +++ b/lib/stdlib/src/uri_string.erl @@ -0,0 +1,1842 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%% +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%% +%% [RFC 3986, Chapter 3. Syntax Components] +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% +%% +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%% +%% +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%% +%% +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%% +%% +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%% +%% +%% [RFC 3986, Chapter 4.3. Absolute URI] +%% +%% Some protocol elements allow only the absolute form of a URI without +%% a fragment identifier. For example, defining a base URI for later +%% use by relative references calls for an absolute-URI syntax rule that +%% does not allow a fragment. +%% +%% absolute-URI = scheme ":" hier-part [ "?" query ] +%% +-module(uri_string). + +%%------------------------------------------------------------------------- +%% External API +%%------------------------------------------------------------------------- +-export([normalize/1, parse/1, + recompose/1, transcode/2]). +-export_type([error/0, uri_map/0, uri_string/0]). + + +%%------------------------------------------------------------------------- +%% Internal API +%%------------------------------------------------------------------------- +-export([is_host/1, is_path/1]). % suppress warnings + + +%%------------------------------------------------------------------------- +%% Macros +%%------------------------------------------------------------------------- +-define(CHAR(Char), <<Char/utf8>>). +-define(STRING_EMPTY, <<>>). +-define(STRING(MatchStr), <<MatchStr/binary>>). +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(DEC2HEX(X), + if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; + ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 + end). + +-define(HEX2DEC(X), + if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; + ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; + ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 + end). + + +%%%========================================================================= +%%% API +%%%========================================================================= + +%%------------------------------------------------------------------------- +%% URI compliant with RFC 3986 +%% ASCII %x21 - %x7A ("!" - "z") except +%% %x34 " double quote +%% %x60 < less than +%% %x62 > greater than +%% %x92 \ backslash +%% %x94 ^ caret / circumflex +%% %x96 ` grave / accent +%%------------------------------------------------------------------------- +-type uri_string() :: iodata(). +-type error() :: {error, atom(), term()}. + + +%%------------------------------------------------------------------------- +%% RFC 3986, Chapter 3. Syntax Components +%%------------------------------------------------------------------------- +-type uri_map() :: + #{fragment => unicode:chardata(), + host => unicode:chardata(), + path => unicode:chardata(), + port => non_neg_integer() | undefined, + query => unicode:chardata(), + scheme => unicode:chardata(), + userinfo => unicode:chardata()} | #{}. + + +%%------------------------------------------------------------------------- +%% Normalize URIs +%%------------------------------------------------------------------------- +-spec normalize(URIString) -> NormalizedURI when + URIString :: uri_string(), + NormalizedURI :: uri_string(). +normalize(URIString) -> + %% Percent-encoding normalization and case normalization for + %% percent-encoded triplets are achieved by running parse and + %% recompose on the input URI string. + recompose( + normalize_path_segment( + normalize_scheme_based( + normalize_case( + parse(URIString))))). + + +%%------------------------------------------------------------------------- +%% Parse URIs +%%------------------------------------------------------------------------- +-spec parse(URIString) -> URIMap when + URIString :: uri_string(), + URIMap :: uri_map() + | error(). +parse(URIString) when is_binary(URIString) -> + try parse_uri_reference(URIString, #{}) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +parse(URIString) when is_list(URIString) -> + try + Binary = unicode:characters_to_binary(URIString), + Map = parse_uri_reference(Binary, #{}), + convert_mapfields_to_list(Map) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%------------------------------------------------------------------------- +%% Recompose URIs +%%------------------------------------------------------------------------- +-spec recompose(URIMap) -> URIString when + URIMap :: uri_map(), + URIString :: uri_string() + | error(). +recompose(Map) -> + case is_valid_map(Map) of + false -> + {error, invalid_map, Map}; + true -> + try + T0 = update_scheme(Map, empty), + T1 = update_userinfo(Map, T0), + T2 = update_host(Map, T1), + T3 = update_port(Map, T2), + T4 = update_path(Map, T3), + T5 = update_query(Map, T4), + update_fragment(Map, T5) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end + end. + + +%%------------------------------------------------------------------------- +%% Transcode URIs +%%------------------------------------------------------------------------- +-spec transcode(URIString, Options) -> Result when + URIString :: uri_string(), + Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], + Result :: uri_string() + | error(). +transcode(URIString, Options) when is_binary(URIString) -> + try + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + List = convert_to_list(URIString, InEnc), + Output = transcode(List, [], InEnc, OutEnc), + convert_to_binary(Output, utf8, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +transcode(URIString, Options) when is_list(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + Flattened = flatten_list(URIString, InEnc), + try transcode(Flattened, [], InEnc, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +%%------------------------------------------------------------------------- +%% Converts Map fields to lists +%%------------------------------------------------------------------------- +convert_mapfields_to_list(Map) -> + Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V); + (_, V) -> V end, + maps:map(Fun, Map). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%%------------------------------------------------------------------------- +-spec parse_uri_reference(binary(), uri_map()) -> uri_map(). +parse_uri_reference(<<>>, _) -> #{path => <<>>}; +parse_uri_reference(URIString, URI) -> + try parse_scheme_start(URIString, URI) + catch + throw:{_,_,_} -> + parse_relative_part(URIString, URI) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_relative_part(binary(), uri_map()) -> uri_map(). +parse_relative_part(?STRING_REST("//", Rest), URI) -> + %% Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{userinfo => decode_userinfo(Userinfo)} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{host => decode_host(remove_brackets(Host))} + end; +parse_relative_part(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST($/, Path))}; +parse_relative_part(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{query => decode_query(Query)}; +parse_relative_part(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{fragment => decode_fragment(Fragment)}; +parse_relative_part(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> + {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST(Char, Path))}; + false -> throw({error,invalid_uri,[Char]}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% path-abempty +%%------------------------------------------------------------------------- +-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment(?STRING_REST(Char, Rest), URI) -> + case is_pchar(Char) of + true -> parse_segment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% path-noscheme +%%------------------------------------------------------------------------- +-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment_nz_nc(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is pchar. +-spec is_pchar(char()) -> boolean(). +is_pchar($%) -> true; % pct-encoded +is_pchar($:) -> true; +is_pchar($@) -> true; +is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is segment_nz_nc. +-spec is_segment_nz_nc(char()) -> boolean(). +is_segment_nz_nc($%) -> true; % pct-encoded +is_segment_nz_nc($@) -> true; +is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%%------------------------------------------------------------------------- +-spec parse_scheme_start(binary(), uri_map()) -> uri_map(). +parse_scheme_start(?STRING_REST(Char, Rest), URI) -> + case is_alpha(Char) of + true -> {T, URI1} = parse_scheme(Rest, URI), + Scheme = calculate_parsed_scheme(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{scheme => ?STRING_REST(Char, Scheme)}; + false -> throw({error,invalid_uri,[Char]}) + end. + +%% Add path component if it missing after parsing the URI. +%% According to the URI specification there is always a +%% path component in every URI-reference and it can be +%% empty. +maybe_add_path(Map) -> + case maps:is_key(path, Map) of + false -> + Map#{path => <<>>}; + _Else -> + Map + end. + + +-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. +parse_scheme(?STRING_REST($:, Rest), URI) -> + {_, URI1} = parse_hier(Rest, URI), + {Rest, URI1}; +parse_scheme(?STRING_REST(Char, Rest), URI) -> + case is_scheme(Char) of + true -> parse_scheme(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_scheme(?STRING_EMPTY, _URI) -> + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in scheme +-spec is_scheme(char()) -> boolean(). +is_scheme($+) -> true; +is_scheme($-) -> true; +is_scheme($.) -> true; +is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). + + +%%------------------------------------------------------------------------- +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. +parse_hier(?STRING_REST("//", Rest), URI) -> + % Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}} + end; +parse_hier(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_hier(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_hier(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless + case is_pchar(Char) of + true -> % segment_nz + {T, URI1} = parse_segment(Rest, URI), + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}}; + false -> throw({error,invalid_uri,[Char]}) + end; +parse_hier(?STRING_EMPTY, URI) -> + {<<>>, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% The authority component is preceded by a double slash ("//") and is +%% terminated by the next slash ("/"), question mark ("?"), or number +%% sign ("#") character, or by the end of the URI. +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%%------------------------------------------------------------------------- +-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. +parse_userinfo(?CHAR($@), URI) -> + {?STRING_EMPTY, URI#{host => <<>>}}; +parse_userinfo(?STRING_REST($@, Rest), URI) -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; +parse_userinfo(?STRING_REST(Char, Rest), URI) -> + case is_userinfo(Char) of + true -> parse_userinfo(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_userinfo(?STRING_EMPTY, _URI) -> + %% URI cannot end in userinfo state + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in userinfo +-spec is_userinfo(char()) -> boolean(). +is_userinfo($%) -> true; % pct-encoded +is_userinfo($:) -> true; +is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%%------------------------------------------------------------------------- +-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. +parse_host(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_host(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_host(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_host(?STRING_REST($[, Rest), URI) -> + parse_ipv6_bin(Rest, [], URI); +parse_host(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_host(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_ipv4_bin(Rest, [Char], URI); + false -> parse_reg_name(?STRING_REST(Char, Rest), URI) + end; +parse_host(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. +parse_reg_name(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_reg_name(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_reg_name(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_reg_name(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_reg_name(?STRING_REST(Char, Rest), URI) -> + case is_reg_name(Char) of + true -> parse_reg_name(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_reg_name(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +%% Check if char is allowed in reg-name +-spec is_reg_name(char()) -> boolean(). +is_reg_name($%) -> true; +is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv4(Char) of + true -> parse_ipv4_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in IPv4 addresses +-spec is_ipv4(char()) -> boolean(). +is_ipv4($.) -> true; +is_ipv4(Char) -> is_digit(Char). + +-spec validate_ipv4_address(list()) -> list(). +validate_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> + _ = validate_ipv6_address(lists:reverse(Acc)), + parse_ipv6_bin_end(Rest, URI); +parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> + throw({error,invalid_uri,<<>>}). + +%% Check if char is allowed in IPv6 addresses +-spec is_ipv6(char()) -> boolean(). +is_ipv6($:) -> true; +is_ipv6($.) -> true; +is_ipv6(Char) -> is_hex_digit(Char). + + +-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin_end(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin_end(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec validate_ipv6_address(list()) -> list(). +validate_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%%------------------------------------------------------------------------- +-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. +parse_port(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_port(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_port(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_port(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_port(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_port(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. +parse_query(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_query(?STRING_REST(Char, Rest), URI) -> + case is_query(Char) of + true -> parse_query(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_query(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in query +-spec is_query(char()) -> boolean(). +is_query($/) -> true; +is_query($?) -> true; +is_query(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_fragment(?STRING_REST(Char, Rest), URI) -> + case is_fragment(Char) of + true -> parse_fragment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_fragment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in fragment +-spec is_fragment(char()) -> boolean(). +is_fragment($/) -> true; +is_fragment($?) -> true; +is_fragment(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%%------------------------------------------------------------------------- + +%% Check if char is sub-delim. +-spec is_sub_delim(char()) -> boolean(). +is_sub_delim($!) -> true; +is_sub_delim($$) -> true; +is_sub_delim($&) -> true; +is_sub_delim($') -> true; +is_sub_delim($() -> true; +is_sub_delim($)) -> true; + +is_sub_delim($*) -> true; +is_sub_delim($+) -> true; +is_sub_delim($,) -> true; +is_sub_delim($;) -> true; +is_sub_delim($=) -> true; +is_sub_delim(_) -> false. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%%------------------------------------------------------------------------- +-spec is_unreserved(char()) -> boolean(). +is_unreserved($-) -> true; +is_unreserved($.) -> true; +is_unreserved($_) -> true; +is_unreserved($~) -> true; +is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). + +-spec is_alpha(char()) -> boolean(). +is_alpha(C) + when $A =< C, C =< $Z; + $a =< C, C =< $z -> true; +is_alpha(_) -> false. + +-spec is_digit(char()) -> boolean(). +is_digit(C) + when $0 =< C, C =< $9 -> true; +is_digit(_) -> false. + +-spec is_hex_digit(char()) -> boolean(). +is_hex_digit(C) + when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; +is_hex_digit(_) -> false. + + +%% Remove enclosing brackets from binary +-spec remove_brackets(binary()) -> binary(). +remove_brackets(<<$[/utf8, Rest/binary>>) -> + {H,T} = split_binary(Rest, byte_size(Rest) - 1), + case T =:= <<$]/utf8>> of + true -> H; + false -> Rest + end; +remove_brackets(Addr) -> Addr. + + +%%------------------------------------------------------------------------- +%% Helper functions for calculating the parsed binary. +%%------------------------------------------------------------------------- +-spec calculate_parsed_scheme(binary(), binary()) -> binary(). +calculate_parsed_scheme(Input, <<>>) -> + strip_last_char(Input, [$:]); +calculate_parsed_scheme(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_part(binary(), binary()) -> binary(). +calculate_parsed_part(Input, <<>>) -> + strip_last_char(Input, [$?,$#]); +calculate_parsed_part(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_userinfo(binary(), binary()) -> binary(). +calculate_parsed_userinfo(Input, <<>>) -> + strip_last_char(Input, [$?,$#,$@]); +calculate_parsed_userinfo(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_host_port(binary(), binary()) -> binary(). +calculate_parsed_host_port(Input, <<>>) -> + strip_last_char(Input, [$:,$?,$#,$/]); +calculate_parsed_host_port(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +calculate_parsed_query_fragment(Input, <<>>) -> + strip_last_char(Input, [$#]); +calculate_parsed_query_fragment(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +get_port(<<>>) -> + undefined; +get_port(B) -> + try binary_to_integer(B) + catch + error:badarg -> + throw({error, invalid_uri, B}) + end. + + +%% Strip last char if it is in list +%% +%% This function is optimized for speed: parse/1 is about 10% faster than +%% with an alternative implementation based on lists and sets. +strip_last_char(<<>>, _) -> <<>>; +strip_last_char(Input, [C0]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2,C3]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + C3 -> + init_binary(Input); + _Else -> + Input + end. + + +%% Get parsed binary +get_parsed_binary(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +%% Return all bytes of the binary except the last one. The binary must be non-empty. +init_binary(B) -> + {Init, _} = + split_binary(B, byte_size(B) - 1), + Init. + + +%% Returns the size of a binary exluding the first element. +%% Used in calls to split_binary(). +-spec byte_size_exl_head(binary()) -> number(). +byte_size_exl_head(<<>>) -> 0; +byte_size_exl_head(Binary) -> byte_size(Binary) + 1. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.1. Percent-Encoding] +%% +%% A percent-encoding mechanism is used to represent a data octet in a +%% component when that octet's corresponding character is outside the +%% allowed set or is being used as a delimiter of, or within, the +%% component. A percent-encoded octet is encoded as a character +%% triplet, consisting of the percent character "%" followed by the two +%% hexadecimal digits representing that octet's numeric value. For +%% example, "%20" is the percent-encoding for the binary octet +%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space +%% character (SP). Section 2.4 describes when percent-encoding and +%% decoding is applied. +%% +%% pct-encoded = "%" HEXDIG HEXDIG +%%------------------------------------------------------------------------- +-spec decode_userinfo(binary()) -> binary(). +decode_userinfo(Cs) -> + check_utf8(decode(Cs, fun is_userinfo/1, <<>>)). + +-spec decode_host(binary()) -> binary(). +decode_host(Cs) -> + check_utf8(decode(Cs, fun is_host/1, <<>>)). + +-spec decode_path(binary()) -> binary(). +decode_path(Cs) -> + check_utf8(decode(Cs, fun is_path/1, <<>>)). + +-spec decode_query(binary()) -> binary(). +decode_query(Cs) -> + check_utf8(decode(Cs, fun is_query/1, <<>>)). + +-spec decode_fragment(binary()) -> binary(). +decode_fragment(Cs) -> + check_utf8(decode(Cs, fun is_fragment/1, <<>>)). + + +%% Returns Cs if it is utf8 encoded. +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {incomplete,_,_} -> + throw({error,invalid_utf8,Cs}); + {error,_,_} -> + throw({error,invalid_utf8,Cs}); + _ -> Cs + end. + +%%------------------------------------------------------------------------- +%% Percent-encode +%%------------------------------------------------------------------------- + +%% Only validates as scheme cannot have percent-encoded characters +-spec encode_scheme(list()|binary()) -> list() | binary(). +encode_scheme([]) -> + throw({error,invalid_scheme,""}); +encode_scheme(<<>>) -> + throw({error,invalid_scheme,<<>>}); +encode_scheme(Scheme) -> + case validate_scheme(Scheme) of + true -> Scheme; + false -> throw({error,invalid_scheme,Scheme}) + end. + +-spec encode_userinfo(list()|binary()) -> list() | binary(). +encode_userinfo(Cs) -> + encode(Cs, fun is_userinfo/1). + +-spec encode_host(list()|binary()) -> list() | binary(). +encode_host(Cs) -> + case classify_host(Cs) of + regname -> Cs; + ipv4 -> Cs; + ipv6 -> bracket_ipv6(Cs); + other -> encode(Cs, fun is_reg_name/1) + end. + +-spec encode_path(list()|binary()) -> list() | binary(). +encode_path(Cs) -> + encode(Cs, fun is_path/1). + +-spec encode_query(list()|binary()) -> list() | binary(). +encode_query(Cs) -> + encode(Cs, fun is_query/1). + +-spec encode_fragment(list()|binary()) -> list() | binary(). +encode_fragment(Cs) -> + encode(Cs, fun is_fragment/1). + +%%------------------------------------------------------------------------- +%% Helper funtions for percent-decode +%%------------------------------------------------------------------------- +decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + decode(Cs, Fun, <<Acc/binary, B>>); + false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) + end; +decode(<<C,Cs/binary>>, Fun, Acc) -> + case Fun(C) of + true -> decode(Cs, Fun, <<Acc/binary, C>>); + false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>}) + end; +decode(<<>>, _Fun, Acc) -> + Acc. + +%% Check if char is allowed in host +-spec is_host(char()) -> boolean(). +is_host($:) -> true; +is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is allowed in path +-spec is_path(char()) -> boolean(). +is_path($/) -> true; +is_path(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% Helper functions for percent-encode +%%------------------------------------------------------------------------- +-spec encode(list()|binary(), fun()) -> list() | binary(). +encode(Component, Fun) when is_list(Component) -> + B = unicode:characters_to_binary(Component), + unicode:characters_to_list(encode(B, Fun, <<>>)); +encode(Component, Fun) when is_binary(Component) -> + encode(Component, Fun, <<>>). +%% +encode(<<Char/utf8, Rest/binary>>, Fun, Acc) -> + C = encode_codepoint_binary(Char, Fun), + encode(Rest, Fun, <<Acc/binary,C/binary>>); +encode(<<Char, Rest/binary>>, _Fun, _Acc) -> + throw({error,invalid_input,<<Char,Rest/binary>>}); +encode(<<>>, _Fun, Acc) -> + Acc. + + +-spec encode_codepoint_binary(integer(), fun()) -> binary(). +encode_codepoint_binary(C, Fun) -> + case Fun(C) of + false -> percent_encode_binary(C); + true -> <<C>> + end. + + +-spec percent_encode_binary(integer()) -> binary(). +percent_encode_binary(Code) -> + percent_encode_binary(<<Code/utf8>>, <<>>). + + +percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) -> + percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>); +percent_encode_binary(<<>>, Acc) -> + Acc. + + +%%------------------------------------------------------------------------- +%%------------------------------------------------------------------------- +validate_scheme([]) -> true; +validate_scheme([H|T]) -> + case is_scheme(H) of + true -> validate_scheme(T); + false -> false + end; +validate_scheme(<<>>) -> true; +validate_scheme(<<H, Rest/binary>>) -> + case is_scheme(H) of + true -> validate_scheme(Rest); + false -> false + end. + + +%%------------------------------------------------------------------------- +%% Classifies hostname into the following categories: +%% regname, ipv4 - address does not contain reserved characters to be +%% percent-encoded +%% ipv6 - address does not contain reserved characters but it shall be +%% encolsed in brackets +%% other - address shall be percent-encoded +%%------------------------------------------------------------------------- +classify_host([]) -> other; +classify_host(Addr) when is_binary(Addr) -> + A = unicode:characters_to_list(Addr), + classify_host_ipv6(A); +classify_host(Addr) -> + classify_host_ipv6(Addr). + +classify_host_ipv6(Addr) -> + case is_ipv6_address(Addr) of + true -> ipv6; + false -> classify_host_ipv4(Addr) + end. + +classify_host_ipv4(Addr) -> + case is_ipv4_address(Addr) of + true -> ipv4; + false -> classify_host_regname(Addr) + end. + +classify_host_regname([]) -> regname; +classify_host_regname([H|T]) -> + case is_reg_name(H) of + true -> classify_host_regname(T); + false -> other + end. + +is_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +is_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +bracket_ipv6(Addr) when is_binary(Addr) -> + concat(<<$[,Addr/binary>>,<<$]>>); +bracket_ipv6(Addr) when is_list(Addr) -> + [$[|Addr] ++ "]". + + +%%------------------------------------------------------------------------- +%% Helper funtions for recompose +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% Checks if input Map has valid combination of fields that can be +%% recomposed into a URI. +%% +%% The implementation is based on a decision tree that fulfills the +%% following rules: +%% - 'path' shall always be present in the input map +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% - 'host' shall be present in the input map when 'path' starts with +%% two slashes ("//") +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% path-abempty = *( "/" segment ) +%% segment = *pchar +%% - 'host' shall be present if userinfo or port is present in input map +%% authority = [ userinfo "@" ] host [ ":" port ] +%% - All fields shall be valid (scheme, userinfo, host, port, path, query +%% or fragment). +%%------------------------------------------------------------------------- +is_valid_map(#{path := Path} = Map) -> + ((starts_with_two_slash(Path) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(port, Map) andalso is_valid_map_host(Map)) + orelse + all_fields_valid(Map)); +is_valid_map(#{}) -> + false. + + +is_valid_map_host(Map) -> + maps:is_key(host, Map) andalso all_fields_valid(Map). + + +all_fields_valid(Map) -> + Fun = fun(scheme, _, Acc) -> Acc; + (userinfo, _, Acc) -> Acc; + (host, _, Acc) -> Acc; + (port, _, Acc) -> Acc; + (path, _, Acc) -> Acc; + (query, _, Acc) -> Acc; + (fragment, _, Acc) -> Acc; + (_, _, _) -> false + end, + maps:fold(Fun, true, Map). + + +starts_with_two_slash([$/,$/|_]) -> + true; +starts_with_two_slash(?STRING_REST("//", _)) -> + true; +starts_with_two_slash(_) -> false. + + +update_scheme(#{scheme := Scheme}, _) -> + add_colon_postfix(encode_scheme(Scheme)); +update_scheme(#{}, _) -> + empty. + + +update_userinfo(#{userinfo := Userinfo}, empty) -> + add_auth_prefix(encode_userinfo(Userinfo)); +update_userinfo(#{userinfo := Userinfo}, URI) -> + concat(URI,add_auth_prefix(encode_userinfo(Userinfo))); +update_userinfo(#{}, empty) -> + empty; +update_userinfo(#{}, URI) -> + URI. + + +update_host(#{host := Host}, empty) -> + add_auth_prefix(encode_host(Host)); +update_host(#{host := Host} = Map, URI) -> + concat(URI,add_host_prefix(Map, encode_host(Host))); +update_host(#{}, empty) -> + empty; +update_host(#{}, URI) -> + URI. + + +%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI +update_port(#{port := undefined}, URI) -> + concat(URI, <<":">>); +update_port(#{port := Port}, URI) -> + concat(URI,add_colon(encode_port(Port))); +update_port(#{}, URI) -> + URI. + + +update_path(#{path := Path}, empty) -> + encode_path(Path); +update_path(#{path := Path}, URI) -> + concat(URI,encode_path(Path)); +update_path(#{}, empty) -> + empty; +update_path(#{}, URI) -> + URI. + + +update_query(#{query := Query}, empty) -> + encode_query(Query); +update_query(#{query := Query}, URI) -> + concat(URI,add_question_mark(encode_query(Query))); +update_query(#{}, empty) -> + empty; +update_query(#{}, URI) -> + URI. + + +update_fragment(#{fragment := Fragment}, empty) -> + add_hashmark(encode_fragment(Fragment)); +update_fragment(#{fragment := Fragment}, URI) -> + concat(URI,add_hashmark(encode_fragment(Fragment))); +update_fragment(#{}, empty) -> + ""; +update_fragment(#{}, URI) -> + URI. + +%%------------------------------------------------------------------------- +%% Concatenates its arguments that can be lists and binaries. +%% The result is a list if at least one of its argument is a list and +%% binary otherwise. +%%------------------------------------------------------------------------- +concat(A, B) when is_binary(A), is_binary(B) -> + <<A/binary, B/binary>>; +concat(A, B) when is_binary(A), is_list(B) -> + unicode:characters_to_list(A) ++ B; +concat(A, B) when is_list(A) -> + A ++ maybe_to_list(B). + +add_hashmark(Comp) when is_binary(Comp) -> + <<$#, Comp/binary>>; +add_hashmark(Comp) when is_list(Comp) -> + [$#|Comp]. + +add_question_mark(Comp) when is_binary(Comp) -> + <<$?, Comp/binary>>; +add_question_mark(Comp) when is_list(Comp) -> + [$?|Comp]. + +add_colon(Comp) when is_binary(Comp) -> + <<$:, Comp/binary>>. + +add_colon_postfix(Comp) when is_binary(Comp) -> + <<Comp/binary,$:>>; +add_colon_postfix(Comp) when is_list(Comp) -> + Comp ++ ":". + +add_auth_prefix(Comp) when is_binary(Comp) -> + <<"//", Comp/binary>>; +add_auth_prefix(Comp) when is_list(Comp) -> + [$/,$/|Comp]. + +add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) -> + <<$@,Host/binary>>; +add_host_prefix(#{}, Host) when is_binary(Host) -> + <<"//",Host/binary>>; +add_host_prefix(#{userinfo := _}, Host) when is_list(Host) -> + [$@|Host]; +add_host_prefix(#{}, Host) when is_list(Host) -> + [$/,$/|Host]. + +maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp); +maybe_to_list(Comp) -> Comp. + +encode_port(Port) -> + integer_to_binary(Port). + +%%------------------------------------------------------------------------- +%% Helper functions for transcode +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). +%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) +%% "x%00%00%00%F6" +%% 2. Accumulate characters until percent-encoded segment (transcode/4). +%% Acc = "x" +%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) +%% <<0,0,0,246>> +%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): +%% <<195,182>> +%% 5. Percent-encode out-encoded binary: +%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> +%% 6. Convert binary to list form, reverse it and append the accumulator +%% "6B%3C%" + "x" +%% 7. Reverse Acc and return it +%%------------------------------------------------------------------------- +transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode_pct(L, Acc, <<>>, InEnc, OutEnc); +transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode(L, Acc, [], InEnc, OutEnc). +%% +transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> + transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); +transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> + transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); +transcode([], Acc, List, _InEncoding, _OutEncoding) -> + lists:reverse(List ++ Acc). + + +%% Transcode percent-encoded segment +transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding); + false -> throw({error, invalid_percent_encoding,L}) + end; +transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)), + transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); +transcode_pct([], Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = convert_to_list(PctEncUtf8, utf8), + lists:reverse(Acc) ++ Out. + + +%% Convert to binary +convert_to_binary(Binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Convert to list +convert_to_list(Binary, InEncoding) -> + case unicode:characters_to_list(Binary, InEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Flatten input list +flatten_list([], _) -> + []; +flatten_list(L, InEnc) -> + flatten_list(L, InEnc, []). +%% +flatten_list([H|T], InEnc, Acc) when is_binary(H) -> + L = convert_to_list(H, InEnc), + flatten_list(T, InEnc, lists:reverse(L) ++ Acc); +flatten_list([H|T], InEnc, Acc) when is_list(H) -> + flatten_list(H ++ T, InEnc, Acc); +flatten_list([H|T], InEnc, Acc) -> + flatten_list(T, InEnc, [H|Acc]); +flatten_list([], _InEnc, Acc) -> + lists:reverse(Acc); +flatten_list(Arg, _, _) -> + throw({error, invalid_input, Arg}). + + +percent_encode_segment(Segment) -> + percent_encode_binary(Segment, <<>>). + + +%%------------------------------------------------------------------------- +%% Helper functions for normalize +%%------------------------------------------------------------------------- + +%% 6.2.2.1. Case Normalization +normalize_case(#{scheme := Scheme, host := Host} = Map) -> + Map#{scheme => to_lower(Scheme), + host => to_lower(Host)}; +normalize_case(#{host := Host} = Map) -> + Map#{host => to_lower(Host)}; +normalize_case(#{scheme := Scheme} = Map) -> + Map#{scheme => to_lower(Scheme)}; +normalize_case(#{} = Map) -> + Map. + + +to_lower(Cs) when is_list(Cs) -> + B = convert_to_binary(Cs, utf8, utf8), + convert_to_list(to_lower(B), utf8); +to_lower(Cs) when is_binary(Cs) -> + to_lower(Cs, <<>>). +%% +to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z -> + to_lower(Cs, <<Acc/binary,(C + 32)>>); +to_lower(<<C,Cs/binary>>, Acc) -> + to_lower(Cs, <<Acc/binary,C>>); +to_lower(<<>>, Acc) -> + Acc. + + +%% 6.2.2.3. Path Segment Normalization +%% 5.2.4. Remove Dot Segments +normalize_path_segment(Map) -> + Path = maps:get(path, Map, undefined), + Map#{path => remove_dot_segments(Path)}. + + +remove_dot_segments(Path) when is_binary(Path) -> + remove_dot_segments(Path, <<>>); +remove_dot_segments(Path) when is_list(Path) -> + B = convert_to_binary(Path, utf8, utf8), + B1 = remove_dot_segments(B, <<>>), + convert_to_list(B1, utf8). +%% +remove_dot_segments(<<>>, Output) -> + Output; +remove_dot_segments(<<"../",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"./",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"/./",T/binary>>, Output) -> + remove_dot_segments(<<$/,T/binary>>, Output); +remove_dot_segments(<<"/.">>, Output) -> + remove_dot_segments(<<$/>>, Output); +remove_dot_segments(<<"/../",T/binary>>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/,T/binary>>, Out1); +remove_dot_segments(<<"/..">>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/>>, Out1); +remove_dot_segments(<<$.>>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(<<"..">>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(Input, Output) -> + {First, Rest} = first_path_segment(Input), + remove_dot_segments(Rest, <<Output/binary,First/binary>>). + + +first_path_segment(Input) -> + F = first_path_segment(Input, <<>>), + split_binary(Input, byte_size(F)). +%% +first_path_segment(<<$/,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>); +first_path_segment(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +first_path_segment_end(<<>>, Acc) -> + Acc; +first_path_segment_end(<<$/,_/binary>>, Acc) -> + Acc; +first_path_segment_end(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +remove_last_segment(<<>>) -> + <<>>; +remove_last_segment(B) -> + {Init, Last} = split_binary(B, byte_size(B) - 1), + case Last of + <<$/>> -> + Init; + _Char -> + remove_last_segment(Init) + end. + + +%% RFC 3986, 6.2.3. Scheme-Based Normalization +normalize_scheme_based(Map) -> + Scheme = maps:get(scheme, Map, undefined), + Port = maps:get(port, Map, undefined), + Path= maps:get(path, Map, undefined), + normalize_scheme_based(Map, Scheme, Port, Path). +%% +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "http"; Scheme =:= <<"http">> -> + normalize_http(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "https"; Scheme =:= <<"https">> -> + normalize_https(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> + normalize_ftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> + normalize_tftp(Map, Port); +normalize_scheme_based(Map, _, _, _) -> + Map. + + +normalize_http(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 80), + normalize_http_path(M1, Path). + + +normalize_https(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 443), + normalize_http_path(M1, Path). + + +normalize_ftp(Map, Port) -> + normalize_port(Map, Port, 21). + + +normalize_ssh_sftp(Map, Port) -> + normalize_port(Map, Port, 22). + + +normalize_tftp(Map, Port) -> + normalize_port(Map, Port, 69). + + +normalize_port(Map, Port, Default) -> + case Port of + Default -> + maps:remove(port, Map); + _Else -> + Map + end. + + +normalize_http_path(Map, Path) -> + case Path of + "" -> + Map#{path => "/"}; + <<>> -> + Map#{path => <<"/">>}; + _Else -> + Map + end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 523cb95065..8490770f3d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -87,6 +87,7 @@ MODULES= \ timer_simple_SUITE \ unicode_SUITE \ unicode_util_SUITE \ + uri_string_SUITE \ win32reg_SUITE \ y2k_SUITE \ select_SUITE \ diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl index 5a5e282998..f329a07b7a 100644 --- a/lib/stdlib/test/ets_SUITE.erl +++ b/lib/stdlib/test/ets_SUITE.erl @@ -1682,7 +1682,7 @@ do_random_test() -> ets:delete(Set), verify_etsmem(EtsMem). -%% Ttest various variants of update_element. +%% Test various variants of update_element. update_element(Config) when is_list(Config) -> EtsMem = etsmem(), repeat_for_opts(fun update_element_opts/1), diff --git a/lib/stdlib/test/filename_SUITE.erl b/lib/stdlib/test/filename_SUITE.erl index fc77593bb8..f284eb1ed6 100644 --- a/lib/stdlib/test/filename_SUITE.erl +++ b/lib/stdlib/test/filename_SUITE.erl @@ -107,6 +107,17 @@ absname(Config) when is_list(Config) -> [Drive|":/erlang/src"] = filename:absname([Drive|":erlang/src"]), "a:/erlang" = filename:absname("a:erlang"), + "//foo" = filename:absname("//foo"), + "//foo/bar" = filename:absname("//foo/bar"), + "//foo/\bar" = filename:absname("//foo/\bar"), + "//foo/bar/baz" = filename:absname("//foo/bar\\baz"), + "//foo/bar/baz" = filename:absname("//foo\\bar/baz"), + "//foo" = filename:absname("\\\\foo"), + "//foo/bar" = filename:absname("\\\\foo/bar"), + "//foo/\bar" = filename:absname("\\\\foo/\bar"), + "//foo/bar/baz" = filename:absname("\\\\foo/bar\\baz"), + "//foo/bar/baz" = filename:absname("\\\\foo\\bar/baz"), + file:set_cwd(Cwd), ok; {unix, _} -> @@ -167,6 +178,23 @@ absname_2(Config) when is_list(Config) -> [Drive|":/"]), "a:/erlang" = filename:absname("a:erlang", [Drive|":/"]), + "//foo" = filename:absname("foo","//"), + "//foo/bar" = filename:absname("foo/bar", "//"), + "//foo/bar" = filename:absname("bar", "//foo"), + "//bar" = filename:absname("/bar", "//foo"), + "//foo/bar/baz" = filename:absname("bar/baz", "//foo"), + "//bar/baz" = filename:absname("//bar/baz", "//foo"), + "//\bar" = filename:absname("/\bar", "//foo"), + "//foo" = filename:absname("foo","\\\\"), + "//foo/bar" = filename:absname("foo/bar", "\\\\"), + "//foo/bar" = filename:absname("bar", "\\\\foo"), + "//bar" = filename:absname("/bar", "\\\\foo"), + "//foo/bar/baz" = filename:absname("bar/baz", "\\\\foo"), + "//bar/baz" = filename:absname("\\\\bar/baz", "\\\\foo"), + "//\bar" = filename:absname("/\bar", "\\\\foo"), + "//bar/baz" = filename:absname("\\\\bar/baz", "//foo"), + "//bar/baz" = filename:absname("//bar/baz", "\\\\foo"), + ok; _ -> "/usr/foo" = filename:absname(foo, "/usr"), @@ -244,6 +272,18 @@ dirname(Config) when is_list(Config) -> "A:usr" = filename:dirname("A:usr/foo.erl"), "/usr" = filename:dirname("\\usr\\foo.erl"), "/" = filename:dirname("\\usr"), + "//foo/bar" = filename:dirname("//foo/bar/baz.erl"), + "//foo/\bar" = filename:dirname("//foo/\bar/baz.erl"), + "//foo/bar" = filename:dirname("//foo\\bar/baz.erl"), + "//foo/bar" = filename:dirname("\\\\foo/bar/baz.erl"), + "//foo/\bar" = filename:dirname("\\\\foo/\bar/baz.erl"), + "//foo/bar" = filename:dirname("\\\\foo\\bar/baz.erl"), + "//foo" = filename:dirname("//foo/baz.erl"), + "//foo" = filename:dirname("//foo/\baz.erl"), + "//foo" = filename:dirname("//foo\\baz.erl"), + "//foo" = filename:dirname("\\\\foo/baz.erl"), + "//foo" = filename:dirname("\\\\foo/\baz.erl"), + "//foo" = filename:dirname("\\\\foo\\baz.erl"), "A:" = filename:dirname("A:"); _ -> true end, @@ -289,7 +329,6 @@ join(Config) when is_list(Config) -> %% join/1 and join/2 (OTP-12158) by using help function %% filename_join/2. "/" = filename:join(["/"]), - "/" = filename:join(["//"]), "usr/foo.erl" = filename_join("usr","foo.erl"), "/src/foo.erl" = filename_join(usr, "/src/foo.erl"), "/src/foo.erl" = filename_join("/src/",'foo.erl'), @@ -301,7 +340,6 @@ join(Config) when is_list(Config) -> "a/b/c/d/e/f/g" = filename_join("a//b/c/", "d//e/f/g"), "a/b/c/d/e/f/g" = filename_join("a//b/c", "d//e/f/g"), "/d/e/f/g" = filename_join("a//b/c", "/d//e/f/g"), - "/d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), "foo/bar" = filename_join([$f,$o,$o,$/,[]], "bar"), @@ -332,6 +370,7 @@ join(Config) when is_list(Config) -> case os:type() of {win32, _} -> + "//" = filename:join(["//"]), "d:/" = filename:join(["D:/"]), "d:/" = filename:join(["D:\\"]), "d:/abc" = filename_join("D:/", "abc"), @@ -345,8 +384,35 @@ join(Config) when is_list(Config) -> "c:/usr/foo.erl" = filename:join(["A:","C:/usr","foo.erl"]), "c:usr/foo.erl" = filename:join(["A:","C:usr","foo.erl"]), "d:/foo" = filename:join([$D, $:, $/, []], "foo"), + "//" = filename:join("\\\\", ""), + "//foo" = filename:join("\\\\", "foo"), + "//foo/bar" = filename:join("\\\\", "foo\\\\bar"), + "//foo/bar/baz" = filename:join("\\\\foo", "bar\\\\baz"), + "//foo/bar/baz" = filename:join("\\\\foo", "bar\\baz"), + "//foo/bar/baz" = filename:join("\\\\foo\\bar", baz), + "//foo/\bar/baz" = filename:join("\\\\foo/\bar", baz), + "//foo/bar/baz" = filename:join("\\\\foo/bar", baz), + "//bar/baz" = filename:join("\\\\foo", "\\\\bar\\baz"), + "//bar/baz" = filename:join("\\\\foo", "//bar\\baz"), + "//bar/baz" = filename:join("\\\\foo", "//bar/baz"), + "//bar/baz" = filename:join("\\\\foo", "\\\\bar/baz"), + "//d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), + "//" = filename:join("//", ""), + "//foo" = filename:join("//", "foo"), + "//foo/bar" = filename:join("//", "foo\\\\bar"), + "//foo/bar/baz" = filename:join("//foo", "bar\\\\baz"), + "//foo/bar/baz" = filename:join("//foo", "bar\\baz"), + "//foo/bar/baz" = filename:join("//foo\\bar", baz), + "//foo/\bar/baz" = filename:join("//foo/\bar", baz), + "//foo/bar/baz" = filename:join("//foo/bar", baz), + "//bar/baz" = filename:join("//foo", "\\\\bar\\baz"), + "//bar/baz" = filename:join("//foo", "//bar\\baz"), + "//bar/baz" = filename:join("//foo", "//bar/baz"), + "//bar/baz" = filename:join("//foo", "\\\\bar/baz"), ok; _ -> + "/" = filename:join(["//"]), + "/d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), ok end. @@ -402,6 +468,16 @@ split(Config) when is_list(Config) -> filename:split("a:\\msdev\\include"), ["a:","msdev","include"] = filename:split("a:msdev\\include"), + ["//","foo"] = + filename:split("\\\\foo"), + ["//","foo"] = + filename:split("//foo"), + ["//","foo","bar"] = + filename:split("\\\\foo\\\\bar"), + ["//","foo","baz"] = + filename:split("\\\\foo\\baz"), + ["//","foo","baz"] = + filename:split("//foo\\baz"), ok; _ -> ok @@ -630,7 +706,6 @@ extension_bin(Config) when is_list(Config) -> join_bin(Config) when is_list(Config) -> <<"/">> = filename:join([<<"/">>]), - <<"/">> = filename:join([<<"//">>]), <<"usr/foo.erl">> = filename:join(<<"usr">>,<<"foo.erl">>), <<"/src/foo.erl">> = filename:join(usr, <<"/src/foo.erl">>), <<"/src/foo.erl">> = filename:join([<<"/src/">>,'foo.erl']), @@ -642,7 +717,6 @@ join_bin(Config) when is_list(Config) -> <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/">>, <<"d//e/f/g">>]), <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"d//e/f/g">>]), <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"/d//e/f/g">>]), - <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), <<"foo/bar">> = filename:join([$f,$o,$o,$/,[]], <<"bar">>), @@ -695,6 +769,7 @@ join_bin(Config) when is_list(Config) -> case os:type() of {win32, _} -> + <<"//">> = filename:join([<<"//">>]), <<"d:/">> = filename:join([<<"D:/">>]), <<"d:/">> = filename:join([<<"D:\\">>]), <<"d:/abc">> = filename:join([<<"D:/">>, <<"abc">>]), @@ -708,8 +783,35 @@ join_bin(Config) when is_list(Config) -> <<"c:/usr/foo.erl">> = filename:join([<<"A:">>,<<"C:/usr">>,<<"foo.erl">>]), <<"c:usr/foo.erl">> = filename:join([<<"A:">>,<<"C:usr">>,<<"foo.erl">>]), <<"d:/foo">> = filename:join([$D, $:, $/, []], <<"foo">>), + <<"//">> = filename:join(<<"\\\\">>, <<"">>), + <<"//foo">> = filename:join(<<"\\\\">>, <<"foo">>), + <<"//foo/bar">> = filename:join(<<"\\\\">>, <<"foo\\\\bar">>), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo">>, <<"bar\\\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar\\baz">>), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo\\bar">>, baz), + <<"//foo/\bar/baz">> = filename:join(<<"\\\\foo/\bar">>, baz), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo/bar">>, baz), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"//bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"//bar/baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar/baz">>), + <<"//d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), + <<"//">> = filename:join(<<"//">>, <<"">>), + <<"//foo">> = filename:join(<<"//">>, <<"foo">>), + <<"//foo/bar">> = filename:join(<<"//">>, <<"foo\\\\bar">>), + <<"//foo/bar/baz">> = filename:join(<<"//foo">>, <<"bar\\\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar\\baz">>), + <<"//foo/bar/baz">> = filename:join(<<"//foo\\bar">>, baz), + <<"//foo/\bar/baz">> = filename:join(<<"//foo/\bar">>, baz), + <<"//foo/bar/baz">> = filename:join(<<"//foo/bar">>, baz), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"//bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"//bar/baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar/baz">>), ok; _ -> + <<"/">> = filename:join([<<"//">>]), + <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), ok end. @@ -756,6 +858,16 @@ split_bin(Config) when is_list(Config) -> filename:split(<<"a:\\msdev\\include">>), [<<"a:">>,<<"msdev">>,<<"include">>] = filename:split(<<"a:msdev\\include">>), + [<<"//">>,<<"foo">>] = + filename:split(<<"\\\\foo">>), + [<<"//">>,<<"foo">>] = + filename:split(<<"//foo">>), + [<<"//">>,<<"foo">>,<<"bar">>] = + filename:split(<<"\\\\foo\\\\bar">>), + [<<"//">>,<<"foo">>,<<"baz">>] = + filename:split(<<"\\\\foo\\baz">>), + [<<"//">>,<<"foo">>,<<"baz">>] = + filename:split(<<"//foo\\baz">>), ok; _ -> ok diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README new file mode 100644 index 0000000000..57602bf719 --- /dev/null +++ b/lib/stdlib/test/property_test/README @@ -0,0 +1,12 @@ + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% %%% +%%% WARNING %%% +%%% %%% +%%% This is experimental code which may be changed or removed %%% +%%% anytime without any warning. %%% +%%% %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr. + diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl new file mode 100644 index 0000000000..e51a671172 --- /dev/null +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -0,0 +1,361 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_recompose). + +-compile(export_all). + +-proptest(eqc). +-proptest([triq,proper]). + +-ifndef(EQC). +-ifndef(PROPER). +-ifndef(TRIQ). +-define(EQC,true). +-endif. +-endif. +-endif. + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +-define(MOD_eqc,eqc). + +-else. +-ifdef(PROPER). +-include_lib("proper/include/proper.hrl"). +-define(MOD_eqc,proper). + +-else. +-ifdef(TRIQ). +-define(MOD_eqc,triq). +-include_lib("triq/include/triq.hrl"). + +-endif. +-endif. +-endif. + + +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(SCHEME, {scheme, scheme()}). +-define(USER, {userinfo, unicode()}). +-define(HOST, {host, host_map()}). +-define(PORT, {port, port()}). +-define(PATH_ABE, {path, path_abempty_map()}). +-define(PATH_ABS, {path, path_absolute_map()}). +-define(PATH_NOS, {path, path_noscheme_map()}). +-define(PATH_ROO, {path, path_rootless_map()}). +-define(PATH_EMP, {path, path_empty_map()}). +-define(QUERY, {query, query_map()}). +-define(FRAGMENT, {fragment, fragment_map()}). + + +%%%======================================================================== +%%% Properties +%%%======================================================================== + +prop_recompose() -> + ?FORALL(Map, map(), + Map =:= uri_string:parse(uri_string:recompose(Map)) + ). + +%% Stats +prop_map_key_length_collect() -> + ?FORALL(List, map(), + collect(length(maps:keys(List)), true)). + +prop_map_collect() -> + ?FORALL(List, map(), + collect(lists:sort(maps:keys(List)), true)). + +prop_scheme_collect() -> + ?FORALL(List, scheme(), + collect(length(List), true)). + + +%%%======================================================================== +%%% Generators +%%%======================================================================== + +map() -> + ?LET(Gen, comp_proplist(), proplist_to_map(Gen)). + +comp_proplist() -> + frequency([ + {2, [?SCHEME,?PATH_ABS]}, + {2, [?SCHEME,?PATH_ROO]}, + {2, [?SCHEME,?PATH_EMP]}, + {2, [?SCHEME,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]}, + + {2, [?PATH_ABS]}, + {2, [?PATH_NOS]}, + {2, [?PATH_EMP]}, + {2, [?HOST,?PATH_ABE]}, + {2, [?USER,?HOST,?PATH_ABE]}, + {2, [?HOST,?PORT,?PATH_ABE]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY]}, + {2, [?SCHEME,?PATH_ROO,?QUERY]}, + {2, [?SCHEME,?PATH_EMP,?QUERY]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + {2, [?PATH_ABS,?QUERY]}, + {2, [?PATH_NOS,?QUERY]}, + {2, [?PATH_EMP,?QUERY]}, + {2, [?HOST,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + + {2, [?SCHEME,?PATH_ABS,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + {2, [?PATH_ABS,?FRAGMENT]}, + {2, [?PATH_NOS,?FRAGMENT]}, + {2, [?PATH_EMP,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + + {2, [?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?PATH_NOS,?QUERY,?FRAGMENT]}, + {2, [?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]} + ]). + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +path_abempty_map() -> + frequency([{90, path_abe_map()}, + {10, path_empty_map()}]). + +path_abe_map() -> + ?SIZED(Length, path_abe_map(Length, [])). +%% +path_abe_map(0, Segments) -> + ?LET(Gen, Segments, lists:append(Gen)); +path_abe_map(N, Segments) -> + path_abe_map(N-1, [slash(),segment()|Segments]). + + +path_absolute_map() -> + ?SIZED(Length, path_absolute_map(Length, [])). +%% +path_absolute_map(0, Segments) -> + ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen)); +path_absolute_map(N, Segments) -> + path_absolute_map(N-1, [slash(),segment()|Segments]). + + +path_noscheme_map() -> + ?SIZED(Length, path_noscheme_map(Length, [])). +%% +path_noscheme_map(0, Segments) -> + ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen)); +path_noscheme_map(N, Segments) -> + path_noscheme_map(N-1, [slash(),segment()|Segments]). + +path_rootless_map() -> + ?SIZED(Length, path_rootless_map(Length, [])). +%% +path_rootless_map(0, Segments) -> + ?LET(Gen, [segment_nz()|Segments], lists:append(Gen)); +path_rootless_map(N, Segments) -> + path_rootless_map(N-1, [slash(),segment()|Segments]). + + +segment_nz() -> + non_empty(segment()). + +segment_nz_nc() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$@])} + ]))). + + +segment() -> + list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$:, $@])} + ])). + +slash() -> + "/". + +path_empty_map() -> + "". + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +host_map() -> + frequency([{30, reg_name()}, + {30, ip_address()} + ]). + + +reg_name() -> + list(frequency([{30, alpha()}, + {10, sub_delims()}, + {10, unicode_char()} + ])). + +ip_address() -> + oneof(["127.0.0.1", "::127.0.0.1", + "2001:0db8:0000:0000:0000:0000:1428:07ab", + "2001:0db8:0000:0000:0000::1428:07ab", + "2001:0db8:0:0:0:0:1428:07ab", + "2001:0db8:0::0:1428:07ab"]). + +%% Generating only reg-names +host_uri() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, pct_encoded()} + ]))). + +%%------------------------------------------------------------------------- +%% Port, Query, Fragment +%%------------------------------------------------------------------------- +port() -> + frequency([{10, undefined}, + {10, range(1,65535)} + ]). + +query_map() -> + unicode(). + + +query_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + +fragment_map() -> + unicode(). + +fragment_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + + +%%------------------------------------------------------------------------- +%% Scheme +%%------------------------------------------------------------------------- +scheme() -> + ?SIZED(Length, scheme_start(Length, [])). +%% +scheme_start(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme_start(N, L) -> + scheme(N-1,[alpha()|L]). + +scheme(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme(N, L) -> + scheme(N-1, [scheme_char()|L]). + + +%%------------------------------------------------------------------------- +%% Misc +%%------------------------------------------------------------------------- +unicode() -> + list(frequency([{20, alpha()}, % alpha + {10, digit()}, % digit + {10, unicode_char()} % unicode + ])). + +scheme_char() -> + frequency([{20, alpha()}, % alpha + {20, digit()}, % digit + {5, oneof([$+, $-, $.])} % punctuation + ]). + +sub_delims() -> + oneof([$!, $$, $&, $', $(, $), + $*, $+, $,,$;, $=]). + +pchar() -> + frequency([{20, unreserved()}, + {5, pct_encoded()}, + {5, sub_delims()}, + {1, oneof([$:, $@])} % punctuation + ]). + +unreserved() -> + frequency([{20, alpha()}, + {5, digit()}, + {1, oneof([$-, $., $_, $~])} % punctuation + ]). + +unicode_char() -> + range(913, 1023). + +alpha() -> + frequency([{20, range($a, $z)}, % letters + {20, range($A, $Z)}]). % letters + +digit() -> + range($0, $9). % numbers + +pct_encoded() -> + oneof(["%C3%A4", "%C3%A5", "%C3%B6"]). + + +%%%======================================================================== +%%% Helpers +%%%======================================================================== +proplist_to_map(L) -> + lists:foldl(fun({K,V},M) -> M#{K => V}; + (_,M) -> M + end, #{}, L). diff --git a/lib/stdlib/test/qlc_SUITE.erl b/lib/stdlib/test/qlc_SUITE.erl index 5e9e03e410..949142ec77 100644 --- a/lib/stdlib/test/qlc_SUITE.erl +++ b/lib/stdlib/test/qlc_SUITE.erl @@ -7871,7 +7871,7 @@ run_test(Config, Extra, {cres, Body, Opts, ExpectedCompileReturn}) -> {module, _} = code:load_abs(AbsFile, Mod), Ms0 = erlang:process_info(self(),messages), - Before = {{get(), lists:sort(ets:all()), Ms0}, pps()}, + Before = {{lget(), lists:sort(ets:all()), Ms0}, pps()}, %% Prepare the check that the qlc module does not call qlc_pt. _ = [unload_pt() || {file, Name} <- [code:is_loaded(qlc_pt)], @@ -7903,7 +7903,7 @@ run_test(Config, Extra, Body) -> wait_for_expected(R, {Strict0,PPS0}=Before, SourceFile, Wait) -> Ms = erlang:process_info(self(),messages), - After = {_,PPS1} = {{get(), lists:sort(ets:all()), Ms}, pps()}, + After = {_,PPS1} = {{lget(), lists:sort(ets:all()), Ms}, pps()}, case {R, After} of {ok, Before} -> ok; @@ -7931,6 +7931,18 @@ wait_for_expected(R, {Strict0,PPS0}=Before, SourceFile, Wait) -> expected({ok,Before}, {R,After}, SourceFile) end. +%% The qlc modules uses the process dictionary for storing names of files. +lget() -> + lists:sort([T || {K, _} = T <- get(), is_qlc_key(K)]). + +%% Copied from the qlc module. +-define(LCACHE_FILE(Ref), {Ref, '$_qlc_cache_tmpfiles_'}). +-define(MERGE_JOIN_FILE, '$_qlc_merge_join_tmpfiles_'). + +is_qlc_key(?LCACHE_FILE(_)) -> true; +is_qlc_key(?MERGE_JOIN_FILE) -> true; +is_qlc_key(_) -> false. + unload_pt() -> erlang:garbage_collect(), % get rid of references to qlc_pt... _ = code:purge(qlc_pt), diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index f69d42551e..ef4f9faad9 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -29,11 +29,14 @@ basic_stats_uniform_1/1, basic_stats_uniform_2/1, basic_stats_standard_normal/1, basic_stats_normal/1, + uniform_real_conv/1, plugin/1, measure/1, reference_jump_state/1, reference_jump_procdict/1]). -export([test/0, gen/1]). +-export([uniform_real_gen/1, uniform_gen/2]). + -include_lib("common_test/include/ct.hrl"). -define(LOOP, 1000000). @@ -46,7 +49,7 @@ all() -> [seed, interval_int, interval_float, api_eq, reference, - {group, basic_stats}, + {group, basic_stats}, uniform_real_conv, plugin, measure, {group, reference_jump} ]. @@ -101,7 +104,7 @@ seed_1(Alg) -> _ = rand:uniform(), S00 = get(rand_seed), erase(), - _ = rand:uniform(), + _ = rand:uniform_real(), false = S00 =:= get(rand_seed), %% hopefully %% Choosing algo and seed @@ -228,11 +231,13 @@ interval_float(Config) when is_list(Config) -> interval_float_1(0) -> ok; interval_float_1(N) -> X = rand:uniform(), + Y = rand:uniform_real(), if - 0.0 =< X, X < 1.0 -> + 0.0 =< X, X < 1.0, 0.0 < Y, Y < 1.0 -> ok; true -> - io:format("X=~p 0=<~p<1.0~n", [X,X]), + io:format("X=~p 0.0=<~p<1.0~n", [X,X]), + io:format("Y=~p 0.0<~p<1.0~n", [Y,Y]), exit({X, rand:export_seed()}) end, interval_float_1(N-1). @@ -334,7 +339,13 @@ basic_stats_normal(Config) when is_list(Config) -> IntendedMeanVariancePairs). basic_uniform_1(N, S0, Sum, A0) when N > 0 -> - {X,S} = rand:uniform_s(S0), + {X,S} = + case N band 1 of + 0 -> + rand:uniform_s(S0); + 1 -> + rand:uniform_real_s(S0) + end, I = trunc(X*100), A = array:set(I, 1+array:get(I,A0), A0), basic_uniform_1(N-1, S, Sum+X, A); @@ -400,6 +411,137 @@ normal_s(Mean, Variance, State0) -> rand:normal_s(Mean, Variance, State0). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% White box test of the conversion to float + +uniform_real_conv(Config) when is_list(Config) -> + [begin +%% ct:pal("~13.16.0bx~3.16.0b: ~p~n", [M,E,Gen]), + uniform_real_conv_check(M, E, Gen) + end || {M, E, Gen} <- uniform_real_conv_data()], + uniform_real_scan(0), + uniform_real_scan(3). + +uniform_real_conv_data() -> + [{16#fffffffffffff, -1, [16#3ffffffffffffff]}, + {16#fffffffffffff, -1, [16#3ffffffffffffe0]}, + {16#ffffffffffffe, -1, [16#3ffffffffffffdf]}, + %% + {16#0000000000000, -1, [16#200000000000000]}, + {16#fffffffffffff, -2, [16#1ffffffffffffff]}, + {16#fffffffffffff, -2, [16#1fffffffffffff0]}, + {16#ffffffffffffe, -2, [16#1ffffffffffffef]}, + %% + {16#0000000000000, -2, [16#100000000000000]}, + {16#fffffffffffff, -3, [16#0ffffffffffffff]}, + {16#fffffffffffff, -3, [16#0fffffffffffff8]}, + {16#ffffffffffffe, -3, [16#0fffffffffffff7]}, + %% + {16#0000000000000, -3, [16#080000000000000]}, + {16#fffffffffffff, -4, [16#07fffffffffffff]}, + {16#fffffffffffff, -4, [16#07ffffffffffffc]}, + {16#ffffffffffffe, -4, [16#07ffffffffffffb]}, + %% + {16#0000000000000, -4, [16#040000000000000]}, + {16#fffffffffffff, -5, [16#03fffffffffffff,16#3ffffffffffffff]}, + {16#fffffffffffff, -5, [16#03ffffffffffffe,16#200000000000000]}, + {16#ffffffffffffe, -5, [16#03fffffffffffff,16#1ffffffffffffff]}, + {16#ffffffffffffe, -5, [16#03fffffffffffff,16#100000000000000]}, + %% + {16#0000000000001, -56, [16#000000000000007,16#00000000000007f]}, + {16#0000000000001, -56, [16#000000000000004,16#000000000000040]}, + {16#0000000000000, -57, [16#000000000000003,16#20000000000001f]}, + {16#0000000000000, -57, [16#000000000000000,16#200000000000000]}, + {16#fffffffffffff, -58, [16#000000000000003,16#1ffffffffffffff]}, + {16#fffffffffffff, -58, [16#000000000000000,16#1fffffffffffff0]}, + {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffef]}, + {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffe0]}, + %% + {16#0000000000000, -58, [16#000000000000000,16#10000000000000f]}, + {16#0000000000000, -58, [16#000000000000000,16#100000000000000]}, + {2#11001100000000000000000000000000000000000011000000011, % 53 bits + -1022, + [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros + 2#1100110000000000000000000000000000000000001 bsl 2, % 43 bits + 2#1000000011 bsl (56-10+2)]}, % 10 bits + {0, -1, % 0.5 after retry + [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros + 2#111111111111111111111111111111111111111111 bsl 2, % 42 bits - retry + 16#200000000000003]}]. % 0.5 + +-define(UNIFORM_REAL_SCAN_PATTERN, (16#19000000000009)). % 53 bits +-define(UNIFORM_REAL_SCAN_NUMBER, (1021)). + +uniform_real_scan_template(K) -> + <<0:?UNIFORM_REAL_SCAN_NUMBER, + ?UNIFORM_REAL_SCAN_PATTERN:53,K:2,0:1>>. + +uniform_real_scan(K) -> + Templ = uniform_real_scan_template(K), + N = ?UNIFORM_REAL_SCAN_NUMBER, + uniform_real_scan(Templ, N, K). + +uniform_real_scan(Templ, N, K) when 0 =< N -> + <<_:N/bits,T/bits>> = Templ, + Data = uniform_real_scan_data(T, K), + uniform_real_conv_check( + ?UNIFORM_REAL_SCAN_PATTERN, N - 1 - ?UNIFORM_REAL_SCAN_NUMBER, Data), + uniform_real_scan(Templ, N - 1, K); +uniform_real_scan(_, _, _) -> + ok. + +uniform_real_scan_data(Templ, K) -> + case Templ of + <<X:56, T/bits>> -> + B = rand:bc64(X), + [(X bsl 2) bor K | + if + 53 =< B -> + []; + true -> + uniform_real_scan_data(T, K) + end]; + _ -> + <<X:56, _/bits>> = <<Templ/bits, 0:56>>, + [(X bsl 2) bor K] + end. + +uniform_real_conv_check(M, E, Gen) -> + <<F/float>> = <<0:1, (E + 16#3ff):11, M:52>>, + try uniform_real_gen(Gen) of + F -> F; + FF -> + ct:pal( + "~s =/= ~s: ~s~n", + [rand:float2str(FF), rand:float2str(F), + [["16#",integer_to_list(G,16),$\s]||G<-Gen]]), + ct:fail({neq, FF, F}) + catch + Error:Reason -> + ct:pal( + "~w:~p ~s: ~s~n", + [Error, Reason, rand:float2str(F), + [["16#",integer_to_list(G,16),$\s]||G<-Gen]]), + ct:fail({Error, Reason, F, erlang:get_stacktrace()}) + end. + + +uniform_real_gen(Gen) -> + State = rand_state(Gen), + {F, {#{type := rand_SUITE_list},[]}} = rand:uniform_real_s(State), + F. + +uniform_gen(Range, Gen) -> + State = rand_state(Gen), + {N, {#{type := rand_SUITE_list},[]}} = rand:uniform_s(Range, State), + N. + +%% Loaded dice for white box tests +rand_state(Gen) -> + {#{type => rand_SUITE_list, bits => 58, weak_low_bits => 1, + next => fun ([H|T]) -> {H, T} end}, + Gen}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Test that the user can write algorithms. plugin(Config) when is_list(Config) -> @@ -520,6 +662,21 @@ do_measure(_Config) -> end, Algs), %% + ct:pal("~nRNG uniform integer half range performance~n",[]), + _ = + measure_1( + fun (State) -> half_range(State) end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% ct:pal("~nRNG uniform integer half range + 1 performance~n",[]), _ = measure_1( @@ -630,7 +787,8 @@ do_measure(_Config) -> Algs), %% ct:pal("~nRNG uniform float performance~n",[]), - _ = measure_1( + _ = + measure_1( fun (_) -> 0 end, fun (State, _, Mod) -> measure_loop( @@ -641,8 +799,22 @@ do_measure(_Config) -> end, Algs), %% + ct:pal("~nRNG uniform_real float performance~n",[]), + _ = + measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM(Mod:uniform_real_s(St0), X, St) + end, + State) + end, + Algs), + %% ct:pal("~nRNG normal float performance~n",[]), - _ = measure_1( + [TMarkNormalFloat|_] = + measure_1( fun (_) -> 0 end, fun (State, _, Mod) -> measure_loop( @@ -652,10 +824,36 @@ do_measure(_Config) -> State) end, Algs), + %% Just for fun try an implementation of the Box-Muller + %% transformation for creating normal distribution floats + %% to compare with our Ziggurat implementation. + %% Generates two numbers per call that we add so they + %% will not be optimized away. Hence the benchmark time + %% is twice what it should be. + TwoPi = 2 * math:pi(), + _ = + measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (State0) -> + {U1, State1} = Mod:uniform_real_s(State0), + {U2, State2} = Mod:uniform_s(State1), + R = math:sqrt(-2.0 * math:log(U1)), + T = TwoPi * U2, + Z0 = R * math:cos(T), + Z1 = R * math:sin(T), + ?CHECK_NORMAL({Z0 + Z1, State2}, X, State3) + end, + State) + end, + exrop, TMarkNormalFloat), ok. +-define(LOOP_MEASURE, (?LOOP div 5)). + measure_loop(Fun, State) -> - measure_loop(Fun, State, ?LOOP). + measure_loop(Fun, State, ?LOOP_MEASURE). %% measure_loop(Fun, State, N) when 0 < N -> measure_loop(Fun, Fun(State), N-1); @@ -693,7 +891,8 @@ measure_1(RangeFun, Fun, Alg, TMark) -> end, io:format( "~.12w: ~p ns ~p% [16#~.16b]~n", - [Alg, (Time * 1000 + 500) div ?LOOP, Percent, Range]), + [Alg, (Time * 1000 + 500) div ?LOOP_MEASURE, + Percent, Range]), Parent ! {self(), Time}, normal end), diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl new file mode 100644 index 0000000000..c625da56c6 --- /dev/null +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -0,0 +1,844 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-export([all/0, suite/0,groups/0, + normalize/1, + parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1, + parse_binary_host_ipv6/1, + parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1, + parse_binary_pct_encoded_userinfo/1, parse_binary_port/1, + parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1, + parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1, + parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1, + parse_pct_encoded_userinfo/1, parse_port/1, + parse_query/1, parse_scheme/1, parse_userinfo/1, + parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, + parse_special/1, parse_special2/1, parse_negative/1, + recompose_fragment/1, recompose_parse_fragment/1, + recompose_query/1, recompose_parse_query/1, + recompose_path/1, recompose_parse_path/1, + recompose_autogen/1, parse_recompose_autogen/1, + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 + ]). + + +-define(SCHEME, "foo"). +-define(USERINFO, "åsa"). +-define(USERINFO_ENC, "%C3%A5sa"). +-define(HOST, "älvsjö"). +-define(HOST_ENC, "%C3%A4lvsj%C3%B6"). +-define(IPV6, "::127.0.0.1"). +-define(IPV6_ENC, "[::127.0.0.1]"). +-define(PORT, 8042). +-define(PORT_ENC, ":8042"). +-define(PATH, "/där"). +-define(PATH_ENC, "/d%C3%A4r"). +-define(QUERY, "name=örn"). +-define(QUERY_ENC, "?name=%C3%B6rn"). +-define(FRAGMENT, "näsa"). +-define(FRAGMENT_ENC, "#n%C3%A4sa"). + + +suite() -> + [{timetrap,{minutes,1}}]. + +all() -> + [ + normalize, + parse_binary_scheme, + parse_binary_userinfo, + parse_binary_pct_encoded_userinfo, + parse_binary_host, + parse_binary_host_ipv4, + parse_binary_host_ipv6, + parse_binary_port, + parse_binary_path, + parse_binary_query, + parse_binary_pct_encoded_query, + parse_binary_fragment, + parse_binary_pct_encoded_fragment, + parse_scheme, + parse_userinfo, + parse_pct_encoded_userinfo, + parse_host, + parse_host_ipv4, + parse_host_ipv6, + parse_port, + parse_path, + parse_query, + parse_pct_encoded_query, + parse_fragment, + parse_pct_encoded_fragment, + parse_list, + parse_binary, + parse_mixed, + parse_relative, + parse_special, + parse_special2, + parse_negative, + recompose_fragment, + recompose_parse_fragment, + recompose_query, + recompose_parse_query, + recompose_path, + recompose_parse_path, + recompose_autogen, + parse_recompose_autogen, + transcode_basic, + transcode_options, + transcode_mixed, + transcode_negative + ]. + +groups() -> + []. + + +%%------------------------------------------------------------------------- +%% Helper functions +%%------------------------------------------------------------------------- +uri_combinations() -> + [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] || + Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none], + Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none], + Hst <- [fun update_host/1, fun update_host_binary/1, + fun update_ipv6/1, fun update_ipv6_binary/1, none], + Prt <- [fun update_port/1, none], + Pat <- [fun update_path/1, fun update_path_binary/1], + Qry <- [fun update_query/1,fun update_query_binary/1, none], + Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none], + not (Usr =:= none andalso Hst =:= none andalso Prt =/= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =:= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)]. + + +generate_test_vector(Comb) -> + Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI}); + (_, Map) -> Map + end, + lists:foldl(Fun, {#{}, empty}, Comb). + +generate_test_vectors(L) -> + lists:map(fun generate_test_vector/1, L). + +update_fragment({In, empty}) -> + {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_list(Out) -> + {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_binary(Out) -> + {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}. + +update_fragment_binary({In, empty}) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>}; +update_fragment_binary({In, Out}) when is_list(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC}; +update_fragment_binary({In, Out}) when is_binary(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}. + + +update_query({In, empty}) -> + {In#{query => ?QUERY}, ?QUERY_ENC}; +update_query({In, Out}) when is_list(Out) -> + {In#{query => ?QUERY}, Out ++ ?QUERY_ENC}; +update_query({In, Out}) when is_binary(Out) -> + {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}. + +update_query_binary({In, empty}) -> + {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>}; +update_query_binary({In, Out}) when is_list(Out) -> + {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC}; +update_query_binary({In, Out}) when is_binary(Out) -> + {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}. + +update_path({In, empty}) -> + {In#{path => ?PATH}, ?PATH_ENC}; +update_path({In, Out}) when is_list(Out) -> + {In#{path => ?PATH}, Out ++ ?PATH_ENC}; +update_path({In, Out}) when is_binary(Out) -> + {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}. + +update_path_binary({In, empty}) -> + {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>}; +update_path_binary({In, Out}) when is_list(Out) -> + {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC}; +update_path_binary({In, Out}) when is_binary(Out) -> + {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}. + +update_port({In, Out}) when is_list(Out) -> + {In#{port => ?PORT}, Out ++ ?PORT_ENC}; +update_port({In, Out}) when is_binary(Out) -> + {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}. + +update_host({In, empty}) -> + {In#{host => ?HOST}, "//" ++ ?HOST_ENC}; +update_host({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]} + end. + +update_host_binary({In, empty}) -> + {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>}; +update_host_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>}; + false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>} + end. + +update_ipv6({In, empty}) -> + {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC}; +update_ipv6({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]} + end. + +update_ipv6_binary({In, empty}) -> + {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>}; +update_ipv6_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>}; + false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>} + end. + +update_userinfo({In, empty}) -> + {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_list(Out) -> + {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_binary(Out) -> + {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}. + +update_userinfo_binary({In, empty}) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>}; +update_userinfo_binary({In, Out}) when is_list(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo_binary({In, Out}) when is_binary(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}. + +update_scheme({In, empty}) -> + {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}. + +update_scheme_binary({In, empty}) -> + {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}. + + +%% Test recompose on a generated test vector +run_test_recompose({#{}, empty}) -> + try "" = uri_string:recompose(#{}) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_recompose({Map, URI}) -> + try URI = uri_string:recompose(Map) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + +%% Test parse - recompose on a generated test vector +run_test_parse_recompose({#{}, empty}) -> + try "" = uri_string:recompose(uri_string:parse("")) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_parse_recompose({Map, URI}) -> + try URI = uri_string:recompose(uri_string:parse(URI)) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + + +%%------------------------------------------------------------------------- +%% Parse tests +%%------------------------------------------------------------------------- + +parse_binary_scheme(_Config) -> + #{} = uri_string:parse(<<>>), + #{path := <<"foo">>} = uri_string:parse(<<"foo">>), + #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>), + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>), + + #{path := <<"/">>} = uri_string:parse(<<"/">>), + #{host := <<>>} = uri_string:parse(<<"//">>), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). + +parse_binary_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"password@localhost">>} = + uri_string:parse(<<"user:password@localhost">>), + #{path := <<"user@">>} = uri_string:parse(<<"user@">>), + #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>), + #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>), + #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>), + #{userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"//user:password@localhost">>), + #{scheme := <<"foo">>, path := <<"/user@">>} = + uri_string:parse(<<"foo:/user@">>), + #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user@localhost">>), + #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user:password@localhost">>). + +parse_binary_pct_encoded_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} = + uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>), + #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>), + #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} = + uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>), + #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>), + {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>), + {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>). + +parse_binary_host(_Config) -> + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} = + uri_string:parse(<<"foo://user@hostname">>). + +parse_binary_host_ipv4(_Config) -> + #{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>), + #{host := <<"127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//127.0.0.1/over/there">>), + #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//127.0.0.1?name=ferret">>), + #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>). + +parse_binary_host_ipv6(_Config) -> + #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>), + #{host := <<"2001:0db8:0000:0000:0000:0000:1428:07ab">>} = + uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>), + #{host := <<"::127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//[::127.0.0.1]/over/there">>), + #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>), + #{host := <<"::127.0.0.1">>, fragment := <<"nose">>} = + uri_string:parse(<<"//[::127.0.0.1]#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>), + {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>). + +parse_binary_port(_Config) -> + #{path:= <<"/:8042">>} = + uri_string:parse(<<"/:8042">>), + #{host:= <<>>, port := 8042} = + uri_string:parse(<<"//:8042">>), + #{host := <<"example.com">>, port:= 8042} = + uri_string:parse(<<"//example.com:8042">>), + #{scheme := <<"foo">>, path := <<"/:8042">>} = + uri_string:parse(<<"foo:/:8042">>), + #{scheme := <<"foo">>, host := <<>>, port := 8042} = + uri_string:parse(<<"foo://:8042">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042">>), + {error,invalid_uri,":"} = uri_string:parse(":600"), + {error,invalid_uri,"x"} = uri_string:parse("//:8042x"). + +parse_binary_path(_Config) -> + #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>), + #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>), + #{scheme := <<"foo">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo:/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo://example.com/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042/over/there">>). + +parse_binary_query(_Config) -> + #{scheme := <<"foo">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:over/there?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:/over/there?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com/?name=ferret">>), + + #{path := <<>>, query := <<"name=ferret">>} = + uri_string:parse(<<"?name=ferret">>), + #{path := <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"over/there?name=ferret">>), + #{path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/?name=ferret">>), + #{path := <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/over/there?name=ferret">>), + #{host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com?name=ferret">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com/?name=ferret">>). + +parse_binary_pct_encoded_query(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, + query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_binary_fragment(_Config) -> + #{scheme := <<"foo">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:#nose">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:over/there#nose">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:/over/there#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com/#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + + #{fragment := <<"nose">>} = + uri_string:parse(<<"#nose">>), + #{path := <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"over/there#nose">>), + #{path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"/#nose">>), + #{path := <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"/over/there#nose">>), + #{host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com#nose">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com/#nose">>). + +parse_binary_pct_encoded_fragment(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_scheme(_Config) -> + #{} = uri_string:parse(""), + #{path := "foo"} = uri_string:parse("foo"), + #{scheme := "foo"} = uri_string:parse("foo:"), + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := ""} = uri_string:parse("foo://"), + #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"), + #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"), + + #{path := "/"} = uri_string:parse("/"), + #{host := ""} = uri_string:parse("//"), + #{host := "", path := "/"} = uri_string:parse("///"). + +parse_userinfo(_Config) -> + #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"), + #{path := "user@"} = uri_string:parse("user@"), + #{path := "/user@"} = uri_string:parse("/user@"), + #{path := "user@localhost"} = uri_string:parse("user@localhost"), + #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"), + #{userinfo := "user:password", host := "localhost"} = + uri_string:parse("//user:password@localhost"), + #{scheme := "foo", path := "/user@"} = + uri_string:parse("foo:/user@"), + #{scheme := "foo", userinfo := "user", host := "localhost"} = + uri_string:parse("foo://user@localhost"), + #{scheme := "foo", userinfo := "user:password", host := "localhost"} = + uri_string:parse("foo://user:password@localhost"). + +parse_pct_encoded_userinfo(_Config) -> + #{scheme := "user", path := "合@気道"} = + uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"), + #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合", host := "気道"} = + uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合:気", host := "道"} = + uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"), + #{scheme := "foo", path := "/合気道@"} = + uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"), + #{scheme := "foo", userinfo := "合", host := "気道"} = + uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"), + #{scheme := "foo", userinfo := "合:気", host := "道"} = + uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"), + {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"), + {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@"). + + +parse_host(_Config) -> + #{host := "hostname"} = uri_string:parse("//hostname"), + #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"), + #{host := "hostname",scheme := "foo", userinfo := "user"} = + uri_string:parse("foo://user@hostname"). + +parse_host_ipv4(_Config) -> + #{host := "127.0.0.1"} = uri_string:parse("//127.0.0.1"), + #{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} = + uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"), + #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"), + #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"), + #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1"). + +parse_host_ipv6(_Config) -> + #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"), + #{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"), + #{host := "::127.0.0.1", query := "name=ferret"} = + uri_string:parse("//[::127.0.0.1]?name=ferret"), + #{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"), + {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]"). + +parse_port(_Config) -> + #{path:= "/:8042"} = + uri_string:parse("/:8042"), + #{host:= "", port := 8042} = + uri_string:parse("//:8042"), + #{host := "example.com", port:= 8042} = + uri_string:parse("//example.com:8042"), + #{scheme := "foo", path := "/:8042"} = + uri_string:parse("foo:/:8042"), + #{scheme := "foo", host := "", port := 8042} = + uri_string:parse("foo://:8042"), + #{scheme := "foo", host := "example.com", port := 8042} = + uri_string:parse("foo://example.com:8042"). + +parse_path(_Config) -> + #{path := "over/there"} = uri_string:parse("over/there"), + #{path := "/over/there"} = uri_string:parse("/over/there"), + #{scheme := "foo", path := "/over/there"} = + uri_string:parse("foo:/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there"} = + uri_string:parse("foo://example.com/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} = + uri_string:parse("foo://example.com:8042/over/there"). + +parse_query(_Config) -> + #{scheme := "foo", query := "name=ferret"} = + uri_string:parse("foo:?name=ferret"), + #{scheme := "foo", path:= "over/there", query := "name=ferret"} = + uri_string:parse("foo:over/there?name=ferret"), + #{scheme := "foo", path:= "/over/there", query := "name=ferret"} = + uri_string:parse("foo:/over/there?name=ferret"), + #{scheme := "foo", host := "example.com", query := "name=ferret"} = + uri_string:parse("foo://example.com?name=ferret"), + #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("foo://example.com/?name=ferret"), + + #{path := "", query := "name=ferret"} = + uri_string:parse("?name=ferret"), + #{path := "over/there", query := "name=ferret"} = + uri_string:parse("over/there?name=ferret"), + #{path := "/", query := "name=ferret"} = + uri_string:parse("/?name=ferret"), + #{path := "/over/there", query := "name=ferret"} = + uri_string:parse("/over/there?name=ferret"), + #{host := "example.com", query := "name=ferret"} = + uri_string:parse("//example.com?name=ferret"), + #{host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("//example.com/?name=ferret"). + +parse_pct_encoded_query(_Config) -> + #{scheme := "foo", host := "example.com", path := "/", + query := "name=合気道"} = + uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", query := "name=合気道"} = + uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"). + +parse_fragment(_Config) -> + #{scheme := "foo", fragment := "nose"} = + uri_string:parse("foo:#nose"), + #{scheme := "foo", path:= "over/there", fragment := "nose"} = + uri_string:parse("foo:over/there#nose"), + #{scheme := "foo", path:= "/over/there", fragment := "nose"} = + uri_string:parse("foo:/over/there#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("foo://example.com/#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + + #{fragment := "nose"} = + uri_string:parse("#nose"), + #{path := "over/there", fragment := "nose"} = + uri_string:parse("over/there#nose"), + #{path := "/", fragment := "nose"} = + uri_string:parse("/#nose"), + #{path := "/over/there", fragment := "nose"} = + uri_string:parse("/over/there#nose"), + #{host := "example.com", fragment := "nose"} = + uri_string:parse("//example.com#nose"), + #{host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("//example.com/#nose"). + +parse_pct_encoded_fragment(_Config) -> + #{scheme := "foo", host := "example.com", fragment := "合気道"} = + uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", fragment := "合気道"} = + uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93"). + +parse_list(_Config) -> + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"), + #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://admin:[email protected]:8042/over/there?name=ferret#nose"). + +parse_binary(_Config) -> + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>), + #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://admin:[email protected]:8042/over/there?name=ferret#nose">>). + + +parse_mixed(_Config) -> + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("fo",<<"o:bar">>)), + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("foo:b",<<"ar">>)), + #{scheme := "foo", path := "bar:bar"} = + uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]). + +parse_relative(_Config) -> + #{path := "/path"} = + uri_string:parse(lists:append("/pa",<<"th">>)), + #{path := "foo"} = + uri_string:parse(lists:append("fo",<<"o">>)). + +parse_special(_Config) -> + #{host := [],query := []} = uri_string:parse("//?"), + #{fragment := [],host := []} = uri_string:parse("//#"), + #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"), + #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>), + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>), + #{host := [],path := "/",query := []} = uri_string:parse("///?"), + #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"), + #{host := "foo",query := []} = uri_string:parse("//foo?"), + #{fragment := [],host := "foo"} = uri_string:parse("//foo#"), + #{host := "foo",path := "/"} = uri_string:parse("//foo/"), + #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"), + #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"), + #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"), + #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"), + #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"), + #{path := [],query := []} = uri_string:parse("?"), + #{path := [],query := "?"} = uri_string:parse("??"), + #{path := [],query := "??"} = uri_string:parse("???"). + +parse_special2(_Config) -> + #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"), + #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"), + #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), + #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), + #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"), + #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"), + #{host := [],path := [],port := undefined} = uri_string:parse("//:"). + +parse_negative(_Config) -> + {error,invalid_uri,"å"} = uri_string:parse("å"), + {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"), + {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"), + {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"), + {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"), + {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"), + {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8"). + + +%%------------------------------------------------------------------------- +%% Recompose tests +%%------------------------------------------------------------------------- +recompose_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}), + ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}). + +recompose_parse_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)), + ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)). + +recompose_query(_Config) -> + <<?QUERY_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}), + <<?QUERY_ENC?FRAGMENT_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, + fragment => <<?FRAGMENT/utf8>>, + path => <<>>}), + "?name=%C3%B6rn" = + uri_string:recompose(#{query => "name=örn", path => ""}), + "?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{query => "name=örn", + fragment => "näsa", + path => ""}). + +recompose_parse_query(_Config) -> + <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)), + <<"?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)), + "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")), + "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")). + +recompose_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(#{path => <<"/där"/utf8>>}), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + fragment => <<"näsa"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>, + fragment => <<"näsa"/utf8>>}), + + + "/d%C3%A4r" = + uri_string:recompose(#{path => "/där"}), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + fragment => "näsa"}), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(#{path => "/där", + query => "name=örn"}), + "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + query => "name=örn", + fragment => "näsa"}). + + +recompose_parse_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)), + + "/d%C3%A4r" = + uri_string:recompose(uri_string:parse("/d%C3%A4r")), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")). + + +recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_recompose/1, Tests). + +parse_recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_parse_recompose/1, Tests). + +transcode_basic(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]). + +transcode_options(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []), + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]). + +transcode_mixed(_Config) -> + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). + +transcode_negative(_Config) -> + {error,invalid_percent_encoding,"%BXbar"} = + uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + {error,invalid_input,<<"ö">>} = + uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). + +normalize(_Config) -> + "/a/g" = uri_string:normalize("/a/b/c/./../../g"), + <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>), + "http://localhost-%C3%B6rebro/a/g" = + uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"), + <<"http://localhost-%C3%B6rebro/a/g">> = + uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>), + <<"https://localhost/">> = + uri_string:normalize(<<"https://localhost:443">>), + <<"https://localhost:445/">> = + uri_string:normalize(<<"https://localhost:445">>), + <<"ftp://localhost">> = + uri_string:normalize(<<"ftp://localhost:21">>), + <<"ssh://localhost">> = + uri_string:normalize(<<"ssh://localhost:22">>), + <<"sftp://localhost">> = + uri_string:normalize(<<"sftp://localhost:22">>), + <<"tftp://localhost">> = + uri_string:normalize(<<"tftp://localhost:69">>). diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl new file mode 100644 index 0000000000..ae2c61c7aa --- /dev/null +++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl @@ -0,0 +1,39 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_property_test_SUITE). + +-include_lib("common_test/include/ct.hrl"). +-compile(export_all). + +all() -> [recompose]. + +init_per_suite(Config) -> + ct_property_test:init_per_suite(Config). + +end_per_suite(Config) -> + Config. + +%%%======================================================================== +%%% Test suites +%%%======================================================================== +recompose(Config) -> + ct_property_test:quickcheck( + uri_string_recompose:prop_recompose(), + Config). |