aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/Makefile1
-rw-r--r--lib/stdlib/doc/src/c.xml9
-rw-r--r--lib/stdlib/doc/src/filelib.xml29
-rw-r--r--lib/stdlib/doc/src/filename.xml30
-rw-r--r--lib/stdlib/doc/src/gen_server.xml52
-rw-r--r--lib/stdlib/doc/src/gen_statem.xml12
-rw-r--r--lib/stdlib/doc/src/rand.xml118
-rw-r--r--lib/stdlib/doc/src/ref_man.xml1
-rw-r--r--lib/stdlib/doc/src/specs.xml1
-rw-r--r--lib/stdlib/doc/src/string.xml396
-rw-r--r--lib/stdlib/doc/src/unicode_usage.xml8
-rw-r--r--lib/stdlib/doc/src/uri_string.xml230
-rw-r--r--lib/stdlib/src/Makefile1
-rw-r--r--lib/stdlib/src/base64.erl108
-rw-r--r--lib/stdlib/src/c.erl18
-rw-r--r--lib/stdlib/src/epp.erl5
-rw-r--r--lib/stdlib/src/erl_lint.erl7
-rw-r--r--lib/stdlib/src/erl_pp.erl40
-rw-r--r--lib/stdlib/src/erl_scan.erl6
-rw-r--r--lib/stdlib/src/escript.erl12
-rw-r--r--lib/stdlib/src/ets.erl4
-rw-r--r--lib/stdlib/src/filelib.erl42
-rw-r--r--lib/stdlib/src/filename.erl106
-rw-r--r--lib/stdlib/src/gen_server.erl31
-rw-r--r--lib/stdlib/src/gen_statem.erl4
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
-rw-r--r--lib/stdlib/src/lib.erl2
-rw-r--r--lib/stdlib/src/otp_internal.erl46
-rw-r--r--lib/stdlib/src/pool.erl6
-rw-r--r--lib/stdlib/src/rand.erl261
-rw-r--r--lib/stdlib/src/slave.erl2
-rw-r--r--lib/stdlib/src/stdlib.app.src3
-rw-r--r--lib/stdlib/src/stdlib.appup.src6
-rw-r--r--lib/stdlib/src/string.erl10
-rw-r--r--lib/stdlib/src/uri_string.erl1842
-rw-r--r--lib/stdlib/test/Makefile4
-rw-r--r--lib/stdlib/test/erl_internal_SUITE.erl4
-rw-r--r--lib/stdlib/test/ets_SUITE.erl34
-rw-r--r--lib/stdlib/test/filelib_SUITE.erl33
-rw-r--r--lib/stdlib/test/gen_server_SUITE.erl111
-rw-r--r--lib/stdlib/test/gen_server_SUITE_data/oc_server.erl12
-rw-r--r--lib/stdlib/test/property_test/README12
-rw-r--r--lib/stdlib/test/property_test/uri_string_recompose.erl361
-rw-r--r--lib/stdlib/test/rand_SUITE.erl568
-rw-r--r--lib/stdlib/test/stdlib.spec1
-rw-r--r--lib/stdlib/test/stdlib_bench.spec7
-rw-r--r--lib/stdlib/test/stdlib_bench_SUITE.erl107
-rw-r--r--lib/stdlib/test/string_SUITE.erl14
-rw-r--r--lib/stdlib/test/unicode_util_SUITE.erl16
-rw-r--r--lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt175
-rw-r--r--lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt23
-rw-r--r--lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt30
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl844
-rw-r--r--lib/stdlib/test/uri_string_property_test_SUITE.erl39
-rw-r--r--lib/stdlib/uc_spec/CaseFolding.txt8
-rw-r--r--lib/stdlib/uc_spec/CompositionExclusions.txt6
-rw-r--r--lib/stdlib/uc_spec/GraphemeBreakProperty.txt78
-rw-r--r--lib/stdlib/uc_spec/PropList.txt83
-rw-r--r--lib/stdlib/uc_spec/SpecialCasing.txt8
-rw-r--r--lib/stdlib/uc_spec/UnicodeData.txt1028
-rwxr-xr-xlib/stdlib/uc_spec/gen_unicode_mod.escript26
61 files changed, 6157 insertions, 934 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile
index 93eac8220d..aeed79408b 100644
--- a/lib/stdlib/doc/src/Makefile
+++ b/lib/stdlib/doc/src/Makefile
@@ -98,6 +98,7 @@ XML_REF3_FILES = \
sys.xml \
timer.xml \
unicode.xml \
+ uri_string.xml \
win32reg.xml \
zip.xml
diff --git a/lib/stdlib/doc/src/c.xml b/lib/stdlib/doc/src/c.xml
index 7666699183..697e1715e7 100644
--- a/lib/stdlib/doc/src/c.xml
+++ b/lib/stdlib/doc/src/c.xml
@@ -94,6 +94,15 @@
</func>
<func>
+ <name name="erlangrc" arity="1"/>
+ <fsummary>Load an erlang resource file.</fsummary>
+ <desc>
+ <p>Search <c>PathList</c> and load <c>.erlang</c> resource file if
+ found.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="flush" arity="0"/>
<fsummary>Flush any messages sent to the shell.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 80c4acffdb..5e631aac21 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -45,6 +45,30 @@
<p>For more information about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
@@ -193,6 +217,11 @@
<p>Other characters represent themselves. Only filenames that
have exactly the same character in the same position match.
Matching is case-sensitive, for example, "a" does not match "A".</p>
+ <p>Directory separators must always be written as <c>/</c>, even on
+ Windows.</p>
+ <p>A character preceded by <c>\</c> loses its special meaning. Note
+ that <c>\</c> must be written as <c>\\</c> in a string literal.
+ For example, "\\?*" will match any filename starting with <c>?</c>.</p>
<p>Notice that multiple "*" characters are allowed
(as in Unix wildcards, but opposed to Windows/DOS wildcards).</p>
<p><em>Examples:</em></p>
diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml
index 14fd5ef787..d2608ad542 100644
--- a/lib/stdlib/doc/src/filename.xml
+++ b/lib/stdlib/doc/src/filename.xml
@@ -46,7 +46,10 @@
filename by removing redundant directory separators, use
<seealso marker="#join/1"><c>join/1</c></seealso>.</p>
- <p>The module supports raw filenames in the way that if a binary is
+ <p>
+ The module supports
+ <seealso marker="unicode_usage#notes-about-raw-filenames">raw
+ filenames</seealso> in the way that if a binary is
present, or the filename cannot be interpreted according to the return
value of <seealso marker="kernel:file#native_name_encoding/0">
<c>file:native_name_encoding/0</c></seealso>, a raw filename is also
@@ -56,6 +59,30 @@
(the join operation is performed of course). For more information
about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
<datatype>
@@ -555,6 +582,7 @@ unsafe</pre>
["a:/","msdev","include"]</pre>
</desc>
</func>
+
</funcs>
</erlref>
diff --git a/lib/stdlib/doc/src/gen_server.xml b/lib/stdlib/doc/src/gen_server.xml
index 7d137fc772..da74e793e6 100644
--- a/lib/stdlib/doc/src/gen_server.xml
+++ b/lib/stdlib/doc/src/gen_server.xml
@@ -60,6 +60,8 @@ gen_server:abcast -----> Module:handle_cast/2
- -----> Module:handle_info/2
+- -----> Module:handle_continue/2
+
- -----> Module:terminate/2
- -----> Module:code_change/3</pre>
@@ -88,6 +90,13 @@ gen_server:abcast -----> Module:handle_cast/2
implies at least two garbage collections (when hibernating and
shortly after waking up) and is not something you want to do
between each call to a busy server.</p>
+
+ <p>If the <c>gen_server</c> process needs to perform an action
+ immediately after initialization or to break the execution of a
+ callback into multiple steps, it can return <c>{continue,Continue}</c>
+ in place of the time-out or hibernation value, which will immediately
+ invoke the <c>handle_continue/2</c> callback.</p>
+
</description>
<funcs>
@@ -610,12 +619,15 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {reply,Reply,NewState} | {reply,Reply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {reply,Reply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {reply,Reply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,Reply,NewState} | {stop,Reason,NewState}</v>
<v>&nbsp;Reply = term()</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
<v>&nbsp;Reason = term()</v>
</type>
<desc>
@@ -673,9 +685,11 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
<v>&nbsp;Reason = term()</v>
</type>
<desc>
@@ -690,6 +704,41 @@ gen_server:abcast -----> Module:handle_cast/2
</func>
<func>
+ <name>Module:handle_continue(Continue, State) -> Result</name>
+ <fsummary>Handle a continue instruction.</fsummary>
+ <type>
+ <v>Continue = term()</v>
+ <v>State = term()</v>
+ <v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
+ <v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
+ <v>&nbsp;NewState = term()</v>
+ <v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
+ <v>&nbsp;Reason = normal | term()</v>
+ </type>
+ <desc>
+ <note>
+ <p>This callback is optional, so callback modules need to
+ export it only if they return <c>{continue,Continue}</c>
+ from another callback. If continue is used and the callback
+ is not implemented, the process will exit with <c>undef</c>
+ error.</p>
+ </note>
+ <p>This function is called by a <c>gen_server</c> process whenever
+ a previous callback returns <c>{continue, Continue}</c>.
+ <c>handle_continue/2</c> is invoked immediately after the previous
+ callback, which makes it useful for performing work after
+ initialization or for splitting the work in a callback in
+ multiple steps, updating the process state along the way.</p>
+ <p>For a description of the other arguments and possible return values,
+ see <seealso marker="#Module:handle_call/3">
+ <c>Module:handle_call/3</c></seealso>.</p>
+ </desc>
+ </func>
+
+ <func>
<name>Module:handle_info(Info, State) -> Result</name>
<fsummary>Handle an incoming message.</fsummary>
<type>
@@ -697,6 +746,7 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
@@ -726,7 +776,7 @@ gen_server:abcast -----> Module:handle_cast/2
<type>
<v>Args = term()</v>
<v>Result = {ok,State} | {ok,State,Timeout} | {ok,State,hibernate}</v>
- <v>&nbsp;| {stop,Reason} | ignore</v>
+ <v>&nbsp;| {ok,State,{continue,Continue}} | {stop,Reason} | ignore</v>
<v>&nbsp;State = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
<v>&nbsp;Reason = term()</v>
diff --git a/lib/stdlib/doc/src/gen_statem.xml b/lib/stdlib/doc/src/gen_statem.xml
index a7caa71dcb..4a824f073e 100644
--- a/lib/stdlib/doc/src/gen_statem.xml
+++ b/lib/stdlib/doc/src/gen_statem.xml
@@ -1329,7 +1329,7 @@ handle_event(_, _, State, Data) ->
<c><anno>T</anno></c> is the time-out time.
<c>{clean_timeout,<anno>T</anno>}</c> works like
just <c>T</c> described in the note above
- and uses a proxy process for <c>T &lt; infinity</c>,
+ and uses a proxy process
while <c>{dirty_timeout,<anno>T</anno>}</c>
bypasses the proxy process which is more lightweight.
</p>
@@ -1339,8 +1339,12 @@ handle_event(_, _, State, Data) ->
with <c>{dirty_timeout,<anno>T</anno>}</c>
to avoid that the calling process dies when the call
times out, you will have to be prepared to handle
- a late reply.
- So why not just let the calling process die?
+ a late reply. Note that there is an odd chance
+ to get a late reply even with
+ <c>{dirty_timeout,infinity}</c> or <c>infinity</c>
+ for example in the event of network problems.
+ So why not just let the calling process die
+ by not catching the exception?
</p>
</note>
<p>
@@ -1851,7 +1855,7 @@ handle_event(_, _, State, Data) ->
</p>
<note>
<p>
- Note that if the <c>gen_statem</c> is started trough
+ Note that if the <c>gen_statem</c> is started through
<seealso marker="proc_lib"><c>proc_lib</c></seealso>
and
<seealso marker="#enter_loop/4"><c>enter_loop/4-6</c></seealso>,
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 89fb858823..21f680a0ee 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -133,8 +133,9 @@
variable <c>rand_seed</c> to remember the current state.</p>
<p>If a process calls
- <seealso marker="#uniform-0"><c>uniform/0</c></seealso> or
- <seealso marker="#uniform-1"><c>uniform/1</c></seealso> without
+ <seealso marker="#uniform-0"><c>uniform/0</c></seealso>,
+ <seealso marker="#uniform-1"><c>uniform/1</c></seealso> or
+ <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso> without
setting a seed first, <seealso marker="#seed-1"><c>seed/1</c></seealso>
is called automatically with the default algorithm and creates a
non-constant seed.</p>
@@ -168,10 +169,17 @@ R3 = rand:uniform(),</pre>
S0 = rand:seed_s(exrop),
{R4, S1} = rand:uniform_s(S0),</pre>
+ <p>Textbook basic form Box-Muller standard normal deviate</p>
+
+ <pre>
+R5 = rand:uniform_real(),
+R6 = rand:uniform(),
+SND0 = math:sqrt(-2 * math:log(R5)) * math:cos(math:pi() * R6)</pre>
+
<p>Create a standard normal deviate:</p>
<pre>
-{SND0, S2} = rand:normal_s(S1),</pre>
+{SND1, S2} = rand:normal_s(S1),</pre>
<p>Create a normal deviate with mean -3 and variance 0.5:</p>
@@ -414,7 +422,8 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
This function may return exactly <c>0.0</c> which can be
fatal for certain applications. If that is undesired
you can use <c>(1.0 - rand:uniform())</c> to get the
- interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+ interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+ <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso>.
</p>
<p>
If neither endpoint is desired you can test and re-try
@@ -432,6 +441,42 @@ end.</pre>
</func>
<func>
+ <name name="uniform_real" arity="0"/>
+ <fsummary>Return a random float.</fsummary>
+ <desc><marker id="uniform_real-0"/>
+ <p>
+ Returns a random float
+ uniformly distributed in the value range
+ <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+ and updates the state in the process dictionary.
+ </p>
+ <p>
+ Conceptually, a random real number <c>R</c> is generated
+ from the interval <c>0 =&lt; R &lt; 1</c> and then the
+ closest rounded down normalized number
+ in the IEEE 754 Double precision format
+ is returned.
+ </p>
+ <note>
+ <p>
+ The generated numbers from this function has got better
+ granularity for small numbers than the regular
+ <seealso marker="#uniform-0"><c>uniform/0</c></seealso>
+ because all bits in the mantissa are random.
+ This property, in combination with the fact that exactly zero
+ is never returned is useful for algoritms doing for example
+ <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+ </p>
+ </note>
+ <p>
+ See
+ <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>
+ for more explanation.
+ </p>
+ </desc>
+ </func>
+
+ <func>
<name name="uniform" arity="1"/>
<fsummary>Return a random integer.</fsummary>
<desc><marker id="uniform-1"/>
@@ -460,7 +505,8 @@ end.</pre>
This function may return exactly <c>0.0</c> which can be
fatal for certain applications. If that is undesired
you can use <c>(1.0 - rand:uniform(State))</c> to get the
- interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+ interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+ <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>.
</p>
<p>
If neither endpoint is desired you can test and re-try
@@ -478,6 +524,68 @@ end.</pre>
</func>
<func>
+ <name name="uniform_real_s" arity="1"/>
+ <fsummary>Return a random float.</fsummary>
+ <desc>
+ <p>
+ Returns, for a specified state, a random float
+ uniformly distributed in the value range
+ <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+ and updates the state in the process dictionary.
+ </p>
+ <p>
+ Conceptually, a random real number <c>R</c> is generated
+ from the interval <c>0 =&lt; R &lt; 1</c> and then the
+ closest rounded down normalized number
+ in the IEEE 754 Double precision format
+ is returned.
+ </p>
+ <note>
+ <p>
+ The generated numbers from this function has got better
+ granularity for small numbers than the regular
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+ because all bits in the mantissa are random.
+ This property, in combination with the fact that exactly zero
+ is never returned is useful for algoritms doing for example
+ <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+ </p>
+ </note>
+ <p>
+ The concept implicates that the probability to get
+ exactly zero is extremely low; so low that this function
+ is in fact guaranteed to never return zero. The smallest
+ number that it might return is <c>DBL_MIN</c>, which is
+ 2.0^(-1022).
+ </p>
+ <p>
+ The value range stated at the top of this function
+ description is technically correct, but
+ <c>0.0 =&lt; <anno>X</anno> &lt; 1.0</c>
+ is a better description of the generated numbers'
+ statistical distribution. Except that exactly 0.0
+ is never returned, which is not possible to observe
+ statistically.
+ </p>
+ <p>
+ For example; for all sub ranges
+ <c>N*2.0^(-53) =&lt; X &lt; (N+1)*2.0^(-53)</c>
+ where
+ <c>0 =&lt; integer(N) &lt; 2.0^53</c>
+ the probability is the same.
+ Compare that with the form of the numbers generated by
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>.
+ </p>
+ <p>
+ Having to generate extra random bits for
+ small numbers costs a little performance.
+ This function is about 20% slower than the regular
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+ </p>
+ </desc>
+ </func>
+
+ <func>
<name name="uniform_s" arity="2"/>
<fsummary>Return a random integer.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml
index 878a3babc5..68bfddbc71 100644
--- a/lib/stdlib/doc/src/ref_man.xml
+++ b/lib/stdlib/doc/src/ref_man.xml
@@ -93,6 +93,7 @@
<xi:include href="sys.xml"/>
<xi:include href="timer.xml"/>
<xi:include href="unicode.xml"/>
+ <xi:include href="uri_string.xml"/>
<xi:include href="win32reg.xml"/>
<xi:include href="zip.xml"/>
</application>
diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml
index 45b207b13d..d559adf9b6 100644
--- a/lib/stdlib/doc/src/specs.xml
+++ b/lib/stdlib/doc/src/specs.xml
@@ -60,6 +60,7 @@
<xi:include href="../specs/specs_sys.xml"/>
<xi:include href="../specs/specs_timer.xml"/>
<xi:include href="../specs/specs_unicode.xml"/>
+ <xi:include href="../specs/specs_uri_string.xml"/>
<xi:include href="../specs/specs_win32reg.xml"/>
<xi:include href="../specs/specs_zip.xml"/>
</specs>
diff --git a/lib/stdlib/doc/src/string.xml b/lib/stdlib/doc/src/string.xml
index 9d5edd9ecf..130fc74a28 100644
--- a/lib/stdlib/doc/src/string.xml
+++ b/lib/stdlib/doc/src/string.xml
@@ -109,10 +109,8 @@
<p>This module has been reworked in Erlang/OTP 20 to
handle <seealso marker="unicode#type-chardata">
<c>unicode:chardata()</c></seealso> and operate on grapheme
- clusters. The <seealso marker="#oldapi"> <c>old
- functions</c></seealso> that only work on Latin-1 lists as input
- are still available but should not be
- used. They will be deprecated in Erlang/OTP 21.
+ clusters. The <c>old functions</c> that only work on Latin-1 lists as input
+ are kept for backwards compatibility reasons but should not be used.
</p>
</description>
@@ -594,7 +592,7 @@ ÖÄÅ</pre>
or <c>both</c>, indicates from which direction characters
are to be removed.
</p>
- <p> Default <c><anno>Characters</anno></c> are the set of
+ <p> Default <c><anno>Characters</anno></c> is the set of
nonbreakable whitespace codepoints, defined as
Pattern_White_Space in
<url href="http://unicode.org/reports/tr31/">Unicode Standard Annex #31</url>.
@@ -631,393 +629,5 @@ ÖÄÅ</pre>
</func>
</funcs>
-
- <section>
- <marker id="oldapi"/>
- <title>Obsolete API functions</title>
- <p>Here follows the function of the old API.
- These functions only work on a list of Latin-1 characters.
- </p>
- <note><p>
- The functions are kept for backward compatibility, but are
- not recommended.
- They will be deprecated in Erlang/OTP 21.
- </p>
- <p>Any undocumented functions in <c>string</c> are not to be used.</p>
- </note>
- </section>
-
- <funcs>
- <func>
- <name name="centre" arity="2"/>
- <name name="centre" arity="3"/>
- <fsummary>Center a string.</fsummary>
- <desc>
- <p>Returns a string, where <c><anno>String</anno></c> is centered in the
- string and surrounded by blanks or <c><anno>Character</anno></c>.
- The resulting string has length <c><anno>Number</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#pad/3"><c>pad/3</c></seealso>.
- </p>
- </desc>
- </func>
-
- <func>
- <name name="chars" arity="2"/>
- <name name="chars" arity="3"/>
- <fsummary>Return a string consisting of numbers of characters.</fsummary>
- <desc>
- <p>Returns a string consisting of <c><anno>Number</anno></c> characters
- <c><anno>Character</anno></c>. Optionally, the string can end with
- string <c><anno>Tail</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="chr" arity="2"/>
- <fsummary>Return the index of the first occurrence of
- a character in a string.</fsummary>
- <desc>
- <p>Returns the index of the first occurrence of
- <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns
- <c>0</c> if <c><anno>Character</anno></c> does not occur.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#find/2"><c>find/2</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="concat" arity="2"/>
- <fsummary>Concatenate two strings.</fsummary>
- <desc>
- <p>Concatenates <c><anno>String1</anno></c> and
- <c><anno>String2</anno></c> to form a new string
- <c><anno>String3</anno></c>, which is returned.</p>
- <p>
- This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use <c>[<anno>String1</anno>, <anno>String2</anno>]</c> as
- <c>Data</c> argument, and call
- <seealso marker="unicode#characters_to_list/2">
- <c>unicode:characters_to_list/2</c></seealso> or
- <seealso marker="unicode#characters_to_binary/2">
- <c>unicode:characters_to_binary/2</c></seealso>
- to flatten the output.
- </p>
- </desc>
- </func>
-
- <func>
- <name name="copies" arity="2"/>
- <fsummary>Copy a string.</fsummary>
- <desc>
- <p>Returns a string containing <c><anno>String</anno></c> repeated
- <c><anno>Number</anno></c> times.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="cspan" arity="2"/>
- <fsummary>Span characters at start of a string.</fsummary>
- <desc>
- <p>Returns the length of the maximum initial segment of
- <c><anno>String</anno></c>, which consists entirely of characters
- not from <c><anno>Chars</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#take/3"><c>take/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:cspan("\t abcdef", " \t").
-0</code>
- </desc>
- </func>
-
- <func>
- <name name="join" arity="2"/>
- <fsummary>Join a list of strings with separator.</fsummary>
- <desc>
- <p>Returns a string with the elements of <c><anno>StringList</anno></c>
- separated by the string in <c><anno>Separator</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="lists#join/2"><c>lists:join/2</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> join(["one", "two", "three"], ", ").
-"one, two, three"</code>
- </desc>
- </func>
-
- <func>
- <name name="left" arity="2"/>
- <name name="left" arity="3"/>
- <fsummary>Adjust left end of a string.</fsummary>
- <desc>
- <p>Returns <c><anno>String</anno></c> with the length adjusted in
- accordance with <c><anno>Number</anno></c>. The left margin is
- fixed. If <c>length(<anno>String</anno>)</c> &lt;
- <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded
- with blanks or <c><anno>Character</anno></c>s.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#pad/2"><c>pad/2</c></seealso> or
- <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:left("Hello",10,$.).
-"Hello....."</code>
- </desc>
- </func>
-
- <func>
- <name name="len" arity="1"/>
- <fsummary>Return the length of a string.</fsummary>
- <desc>
- <p>Returns the number of characters in <c><anno>String</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#length/1"><c>length/1</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="rchr" arity="2"/>
- <fsummary>Return the index of the last occurrence of
- a character in a string.</fsummary>
- <desc>
- <p>Returns the index of the last occurrence of
- <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns
- <c>0</c> if <c><anno>Character</anno></c> does not occur.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#find/3"><c>find/3</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="right" arity="2"/>
- <name name="right" arity="3"/>
- <fsummary>Adjust right end of a string.</fsummary>
- <desc>
- <p>Returns <c><anno>String</anno></c> with the length adjusted in
- accordance with <c><anno>Number</anno></c>. The right margin is
- fixed. If the length of <c>(<anno>String</anno>)</c> &lt;
- <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded
- with blanks or <c><anno>Character</anno></c>s.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:right("Hello", 10, $.).
-".....Hello"</code>
- </desc>
- </func>
-
- <func>
- <name name="rstr" arity="2"/>
- <fsummary>Find the index of a substring.</fsummary>
- <desc>
- <p>Returns the position where the last occurrence of
- <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>.
- Returns <c>0</c> if <c><anno>SubString</anno></c>
- does not exist in <c><anno>String</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#find/3"><c>find/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:rstr(" Hello Hello World World ", "Hello World").
-8</code>
- </desc>
- </func>
-
- <func>
- <name name="span" arity="2"/>
- <fsummary>Span characters at start of a string.</fsummary>
- <desc>
- <p>Returns the length of the maximum initial segment of
- <c><anno>String</anno></c>, which consists entirely of characters
- from <c><anno>Chars</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#take/2"><c>take/2</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:span("\t abcdef", " \t").
-5</code>
- </desc>
- </func>
-
- <func>
- <name name="str" arity="2"/>
- <fsummary>Find the index of a substring.</fsummary>
- <desc>
- <p>Returns the position where the first occurrence of
- <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>.
- Returns <c>0</c> if <c><anno>SubString</anno></c>
- does not exist in <c><anno>String</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#find/2"><c>find/2</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:str(" Hello Hello World World ", "Hello World").
-8</code>
- </desc>
- </func>
-
- <func>
- <name name="strip" arity="1"/>
- <name name="strip" arity="2"/>
- <name name="strip" arity="3"/>
- <fsummary>Strip leading or trailing characters.</fsummary>
- <desc>
- <p>Returns a string, where leading or trailing, or both, blanks or a
- number of <c><anno>Character</anno></c> have been removed.
- <c><anno>Direction</anno></c>, which can be <c>left</c>, <c>right</c>,
- or <c>both</c>, indicates from which direction blanks are to be
- removed. <c>strip/1</c> is equivalent to
- <c>strip(String, both)</c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#trim/3"><c>trim/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:strip("...Hello.....", both, $.).
-"Hello"</code>
- </desc>
- </func>
-
- <func>
- <name name="sub_string" arity="2"/>
- <name name="sub_string" arity="3"/>
- <fsummary>Extract a substring.</fsummary>
- <desc>
- <p>Returns a substring of <c><anno>String</anno></c>, starting at
- position <c><anno>Start</anno></c> to the end of the string, or to
- and including position <c><anno>Stop</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-sub_string("Hello World", 4, 8).
-"lo Wo"</code>
- </desc>
- </func>
-
- <func>
- <name name="substr" arity="2"/>
- <name name="substr" arity="3"/>
- <fsummary>Return a substring of a string.</fsummary>
- <desc>
- <p>Returns a substring of <c><anno>String</anno></c>, starting at
- position <c><anno>Start</anno></c>, and ending at the end of the
- string or at length <c><anno>Length</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> substr("Hello World", 4, 5).
-"lo Wo"</code>
- </desc>
- </func>
-
- <func>
- <name name="sub_word" arity="2"/>
- <name name="sub_word" arity="3"/>
- <fsummary>Extract subword.</fsummary>
- <desc>
- <p>Returns the word in position <c><anno>Number</anno></c> of
- <c><anno>String</anno></c>. Words are separated by blanks or
- <c><anno>Character</anno></c>s.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#nth_lexeme/3"><c>nth_lexeme/3</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> string:sub_word(" Hello old boy !",3,$o).
-"ld b"</code>
- </desc>
- </func>
-
- <func>
- <name name="to_lower" arity="1" clause_i="1"/>
- <name name="to_lower" arity="1" clause_i="2"/>
- <name name="to_upper" arity="1" clause_i="1"/>
- <name name="to_upper" arity="1" clause_i="2"/>
- <fsummary>Convert case of string (ISO/IEC 8859-1).</fsummary>
- <type variable="String" name_i="1"/>
- <type variable="Result" name_i="1"/>
- <type variable="Char"/>
- <type variable="CharResult"/>
- <desc>
- <p>The specified string or character is case-converted. Notice that
- the supported character set is ISO/IEC 8859-1 (also called Latin 1);
- all values outside this set are unchanged</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso> use
- <seealso marker="#lowercase/1"><c>lowercase/1</c></seealso>,
- <seealso marker="#uppercase/1"><c>uppercase/1</c></seealso>,
- <seealso marker="#titlecase/1"><c>titlecase/1</c></seealso> or
- <seealso marker="#casefold/1"><c>casefold/1</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="tokens" arity="2"/>
- <fsummary>Split string into tokens.</fsummary>
- <desc>
- <p>Returns a list of tokens in <c><anno>String</anno></c>, separated
- by the characters in <c><anno>SeparatorList</anno></c>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> tokens("abc defxxghix jkl", "x ").
-["abc", "def", "ghi", "jkl"]</code>
- <p>Notice that, as shown in this example, two or more
- adjacent separator characters in <c><anno>String</anno></c>
- are treated as one. That is, there are no empty
- strings in the resulting list of tokens.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p>
- </desc>
- </func>
-
- <func>
- <name name="words" arity="1"/>
- <name name="words" arity="2"/>
- <fsummary>Count blank separated words.</fsummary>
- <desc>
- <p>Returns the number of words in <c><anno>String</anno></c>, separated
- by blanks or <c><anno>Character</anno></c>.</p>
- <p>This function is <seealso marker="#oldapi">obsolete</seealso>.
- Use
- <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p>
- <p><em>Example:</em></p>
- <code type="none">
-> words(" Hello old boy!", $o).
-4</code>
- </desc>
- </func>
- </funcs>
-
- <section>
- <title>Notes</title>
- <p>Some of the general string functions can seem to overlap each
- other. The reason is that this string package is the
- combination of two earlier packages and all functions of
- both packages have been retained.</p>
- </section>
-
</erlref>
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 26dc46719e..789e063c12 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -719,8 +719,8 @@ Eshell V5.10.1 (abort with ^G)
</section>
<section>
- <title>Unicode Filenames</title>
<marker id="unicode_file_names"/>
+ <title>Unicode Filenames</title>
<p>Most modern operating systems support Unicode filenames in some way.
There are many different ways to do this and Erlang by default treats the
different approaches differently:</p>
@@ -855,8 +855,12 @@ Eshell V5.10.1 (abort with ^G)
</note>
<section>
- <title>Notes About Raw Filenames</title>
<marker id="notes-about-raw-filenames"/>
+ <title>Notes About Raw Filenames</title>
+ <note><p>
+ Note that raw filenames <em>not</em> necessarily are encoded the
+ same way as on the OS level.
+ </p></note>
<p>Raw filenames were introduced together with Unicode filename support
in ERTS 5.8.2 (Erlang/OTP R14B01). The reason &quot;raw
filenames&quot; were introduced in the system was
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
new file mode 100644
index 0000000000..9ace2b0a05
--- /dev/null
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>2017</year><year>2017</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ </legalnotice>
+
+ <title>uri_string</title>
+ <prepared>Péter Dimitrov</prepared>
+ <docno>1</docno>
+ <date>2017-10-24</date>
+ <rev>A</rev>
+ </header>
+ <module>uri_string</module>
+ <modulesummary>URI processing functions.</modulesummary>
+ <description>
+ <p>This module contains functions for parsing and handling URIs
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>).
+ </p>
+ <p>A URI is an identifier consisting of a sequence of characters matching the syntax
+ rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
+ </p>
+ <p> The generic URI syntax consists of a hierarchical sequence of components referred
+ to as the scheme, authority, path, query, and fragment:</p>
+ <pre>
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ </pre><br></br>
+ <p>The interpretation of a URI depends only on the characters used and not on how those
+ characters are represented in a network protocol.</p>
+ <p>The functions implemented by this module cover the following use cases:</p>
+ <list type="bulleted">
+ <item>Parsing URIs into its components and returing a map<br></br>
+ <seealso marker="#parse/1"><c>parse/1</c></seealso>
+ </item>
+ <item>Recomposing a map of URI components into a URI string<br></br>
+ <seealso marker="#recompose/1"><c>recompose/1</c></seealso>
+ </item>
+ <item>Changing inbound binary and percent-encoding of URIs<br></br>
+ <seealso marker="#transcode/2"><c>transcode/2</c></seealso>
+ </item>
+ <item>Transforming URIs into a normalized form<br></br>
+ <seealso marker="#normalize/1"><c>normalize/1</c></seealso>
+ </item>
+ </list>
+ <p>There are four different encodings present during the handling of URIs:</p>
+ <list type="bulleted">
+ <item>Inbound binary encoding in binaries</item>
+ <item>Inbound percent-encoding in lists and binaries</item>
+ <item>Outbound binary encoding in binaries</item>
+ <item>Outbound percent-encoding in lists and binaries</item>
+ </list>
+ <p>Functions with <c>uri_string()</c> argument accept lists, binaries and
+ mixed lists (lists with binary elements) as input type. All of the functions but
+ <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries
+ and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p>
+ <p>Unless otherwise specified the return value type and encoding are the same as the input
+ type and encoding. That is, binary input returns binary output, list input returns a list
+ output but mixed input returns list output.</p>
+ <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding
+ and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert
+ between the supported encodings, it takes a <c>uri_string()</c> and a list of options
+ specifying inbound and outbound encodings.</p>
+ <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific
+ character encoding and it is usually defined by the protocol or surrounding text. This library
+ takes the same assumption, binary and percent-encoding are handled as one configuration unit,
+ they cannot be set to different values.</p>
+ </description>
+
+ <datatypes>
+ <datatype>
+ <name name="error"/>
+ <desc>
+ <p>Error tuple indicating the type of error. Possible values of the second component:</p>
+ <list type="bulleted">
+ <item><c>invalid_input</c></item>
+ <item><c>invalid_map</c></item>
+ <item><c>invalid_percent_encoding</c></item>
+ <item><c>invalid_scheme</c></item>
+ <item><c>invalid_uri</c></item>
+ <item><c>invalid_utf8</c></item>
+ </list>
+ <p>The third component is a term providing additional information about the
+ cause of the error.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_map"/>
+ <desc>
+ <p>Map holding the main components of a URI.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_string"/>
+ <desc>
+ <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two,
+ representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant URI (<em>percent-encoded form</em>).
+ A URI is a sequence of characters from a very limited set: the letters of
+ the basic Latin alphabet, digits, and a few special characters.</p>
+ </desc>
+ </datatype>
+ </datatypes>
+
+ <funcs>
+
+ <func>
+ <name name="normalize" arity="1"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Transforms <c><anno>URIString</anno></c> into a normalized form
+ using Syntax-Based Normalization as defined by
+ <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p>
+ <p>This function implements case normalization, percent-encoding
+ normalization, path segment normalization and scheme based normalization
+ for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g").</input>
+"/a/g"
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]>
+<![CDATA[<<"mid/6">>]]>
+3> uri_string:normalize("http://localhost:80").
+"https://localhost/"
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="parse" arity="1"/>
+ <fsummary>Parse URI into a map.</fsummary>
+ <desc>
+ <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed
+ components of the <c>URI</c>.
+ If parsing fails, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#recompose/1">
+ <c>recompose/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input>
+#{fragment => "nose",host => "example.com",
+ path => "/over/there",port => 8042,query => "name=ferret",
+ scheme => foo,userinfo => "user"}
+2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]>
+<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>,
+ port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>,
+ userinfo => <<"user">>}]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="recompose" arity="1"/>
+ <fsummary>Recompose URI.</fsummary>
+ <desc>
+ <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant
+ <c><anno>URIString</anno></c> (percent-encoded), based on the components of
+ <c><anno>URIMap</anno></c>.
+ If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#parse/1">
+ <c>parse/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input>
+1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
+#{fragment => "top",host => "example.com",
+ path => "/over/there",port => 8042,query => "?name=ferret",
+ scheme => foo,userinfo => "user"}
+
+2> <input>uri_string:recompose(URIMap).</input>
+"foo://example.com:8042/over/there?name=ferret#nose"</pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="transcode" arity="2"/>
+ <fsummary>Transcode URI.</fsummary>
+ <desc>
+ <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c><anno>URIString</anno></c>,
+ where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound
+ (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c>
+ and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the
+ input and output data. Mixed encoding, where binary encoding is not the same as
+ percent-encoding, is not supported.
+ If an argument is invalid, an error tuple is returned.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input>
+1> [{in_encoding, utf32},{out_encoding, utf8}]).
+<![CDATA[<<"foo%C3%B6bar"/utf8>>]]>
+2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1},
+2> {out_encoding, utf8}]).
+"foo%C3%B6bar"
+ </pre>
+ </desc>
+ </func>
+
+ </funcs>
+</erlref>
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index bf836203ec..8b156929d7 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -121,6 +121,7 @@ MODULES= \
timer \
unicode \
unicode_util \
+ uri_string \
win32reg \
zip
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index 5885745fb1..c8cf6fdffe 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -113,9 +113,9 @@ encode_binary(Bin) ->
Data :: ascii_binary().
decode(Bin) when is_binary(Bin) ->
- decode_binary(<<>>, Bin);
+ decode_binary(Bin, <<>>);
decode(List) when is_list(List) ->
- list_to_binary(decode_l(List)).
+ decode_list(List, <<>>).
-spec mime_decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -186,31 +186,41 @@ mime_decode_to_string(List) when is_list(List) ->
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
-decode_binary(Result0, <<C:8,T0/bits>>) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- erlang:error({badarg,C});
- ws ->
- decode_binary(Result0, T0);
- eq ->
- case strip_ws(T0) of
- <<$=:8,T/binary>> ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- T ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
- end;
- Bits ->
- decode_binary(<<Result0/bits,Bits:6>>, T0)
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
end;
-decode_binary(Result, <<>>) ->
- true = is_binary(Result),
- Result.
+decode_binary(<<>>, A) ->
+ A.
+
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
+
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws_binary(<<>>, A) ->
+ A;
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws_binary(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
@@ -262,6 +272,42 @@ mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
Result
end.
+decode_list([C1 | Cs], A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A);
+ B1 -> decode_list(Cs, A, B1)
+ end;
+decode_list([], A) ->
+ A.
+
+decode_list([C2 | Cs], A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1);
+ B2 -> decode_list(Cs, A, B1, B2)
+ end.
+
+decode_list([C3 | Cs], A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2);
+ B3 -> decode_list(Cs, A, B1, B2, B3)
+ end.
+
+decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws([], A) ->
+ A;
+only_ws([C | Cs], A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
+
decode([], A) -> A;
decode([$=,$=,C2,C1|Cs], A) ->
Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
@@ -292,16 +338,6 @@ strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
-strip_ws(<<$\t,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\n,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\r,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\s,T/binary>>) ->
- strip_ws(T);
-strip_ws(T) -> T.
-
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
%% characters. See section 3.3 of RFC4648
diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index c04a201ce1..9a447af5b7 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -668,19 +668,23 @@ lm() ->
[l(M) || M <- mm()].
%% erlangrc(Home)
-%% Try to run a ".erlang" file, first in the current directory
-%% else in home directory.
+%% Try to run a ".erlang" file in home directory.
+
+-spec erlangrc() -> {ok, file:filename()} | {error, term()}.
erlangrc() ->
case init:get_argument(home) of
{ok,[[Home]]} ->
erlangrc([Home]);
_ ->
- f_p_e(["."], ".erlang")
+ {error, enoent}
end.
-erlangrc([Home]) ->
- f_p_e([".",Home], ".erlang").
+-spec erlangrc(PathList) -> {ok, file:filename()} | {error, term()}
+ when PathList :: [Dir :: file:name()].
+
+erlangrc([Home|_]=Paths) when is_list(Home) ->
+ f_p_e(Paths, ".erlang").
error(Fmt, Args) ->
error_logger:error_msg(Fmt, Args).
@@ -692,11 +696,11 @@ f_p_e(P, F) ->
{error, E={Line, _Mod, _Term}} ->
error("file:path_eval(~tp,~tp): error on line ~p: ~ts~n",
[P, F, Line, file:format_error(E)]),
- ok;
+ {error, E};
{error, E} ->
error("file:path_eval(~tp,~tp): ~ts~n",
[P, F, file:format_error(E)]),
- ok;
+ {error, E};
Other ->
Other
end.
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index 31d0d499e3..00e6a10d8a 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -479,7 +479,7 @@ com_enc(_B, _Fun, _N, L, Ps) ->
com_enc_end([L | Ps]).
com_enc_end(Ps0) ->
- Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]),
+ Ps = lists:reverse([lists:reverse(lowercase(P)) || P <- Ps0]),
com_encoding(Ps).
com_encoding(["latin","1"|_]) ->
@@ -489,6 +489,9 @@ com_encoding(["utf","8"|_]) ->
com_encoding(_) ->
throw(no). % Don't try any further
+lowercase(S) ->
+ unicode:characters_to_list(string:lowercase(S)).
+
normalize_typed_record_fields([]) ->
{typed, []};
normalize_typed_record_fields(Fields) ->
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 9cd4727dc3..f58cb35cea 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3910,10 +3910,9 @@ check_format_string(Fmt) ->
extract_sequences(Fmt, []).
extract_sequences(Fmt, Need0) ->
- case string:chr(Fmt, $~) of
- 0 -> {ok,lists:reverse(Need0)}; %That's it
- Pos ->
- Fmt1 = string:substr(Fmt, Pos+1), %Skip ~
+ case string:find(Fmt, [$~]) of
+ nomatch -> {ok,lists:reverse(Need0)}; %That's it
+ [$~|Fmt1] ->
case extract_sequence(1, Fmt1, Need0) of
{ok,Need1,Rest} -> extract_sequences(Rest, Need1);
Error -> Error
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index ee5e7a11bf..367dbefb82 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -237,13 +237,20 @@ lform({attribute,Line,Name,Arg}, Opts) ->
lform({function,Line,Name,Arity,Clauses}, Opts) ->
lfunction({function,Line,Name,Arity,Clauses}, Opts);
%% These are specials to make it easier for the compiler.
-lform({error,E}, _Opts) ->
- leaf(format("~p\n", [{error,E}]));
-lform({warning,W}, _Opts) ->
- leaf(format("~p\n", [{warning,W}]));
+lform({error,_}=E, Opts) ->
+ message(E, Opts);
+lform({warning,_}=W, Opts) ->
+ message(W, Opts);
lform({eof,_Line}, _Opts) ->
$\n.
+message(M, #options{encoding = Encoding}) ->
+ F = case Encoding of
+ latin1 -> "~p\n";
+ unicode -> "~tp\n"
+ end,
+ leaf(format(F, [M])).
+
lattribute({attribute,_Line,type,Type}, Opts) ->
[typeattr(type, Type, Opts),leaf(".\n")];
lattribute({attribute,_Line,opaque,Type}, Opts) ->
@@ -598,8 +605,6 @@ lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Opts) ->
lexpr({named_fun,_,Name,Cs,Extra}, _Prec, Opts) ->
{force_nl,fun_info(Extra),
{list,[{first,['fun', " "],fun_clauses(Cs, Opts, {named, Name})},'end']}};
-lexpr({'query',_,Lc}, _Prec, Opts) ->
- {list,[{step,leaf("query"),lexpr(Lc, 0, Opts)},'end']};
lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Opts) ->
case erl_internal:bif(M, F, length(Args)) of
true ->
@@ -904,7 +909,7 @@ maybe_paren(_P, _Prec, Expr) ->
Expr.
leaf(S) ->
- {leaf,chars_size(S),S}.
+ {leaf,string:length(S),S}.
%%% Do the formatting. Currently nothing fancy. Could probably have
%%% done it in one single pass.
@@ -964,7 +969,7 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) ->
Sizes = BSizeL ++ SizeL,
NSepChars = if
is_list(Sep), Sep =/= [] ->
- erlang:max(0, length(CharsL)-1);
+ erlang:max(0, length(CharsL)-1); % not string:length
true ->
0
end,
@@ -1120,7 +1125,7 @@ incr(I, Incr) ->
I+Incr.
indentation(E, I) when I < 0 ->
- chars_size(E);
+ string:length(E);
indentation(E, I0) ->
I = io_lib_format:indentation(E, I0),
case has_nl(E) of
@@ -1157,19 +1162,19 @@ write_a_string(S, I, PP) ->
write_a_string([], _N, _Len, _PP) ->
[];
write_a_string(S, N, Len, PP) ->
- SS = string:sub_string(S, 1, N),
+ SS = string:slice(S, 0, N),
Sl = write_string(SS, PP),
- case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of
+ case (string:length(Sl) > Len) and (N > ?MIN_SUBSTRING) of
true ->
write_a_string(S, N-1, Len, PP);
false ->
[flat_leaf(Sl) |
- write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)]
+ write_a_string(string:slice(S, string:length(SS)), Len, Len, PP)]
end.
flat_leaf(S) ->
L = lists:flatten(S),
- {leaf,length(L),L}.
+ {leaf,string:length(L),L}.
write_value(V, PP) ->
(PP#pp.value_fun)(V).
@@ -1190,15 +1195,6 @@ write_char(C, PP) ->
a0() ->
erl_anno:new(0).
-chars_size([C | Es]) when is_integer(C) ->
- 1 + chars_size(Es);
-chars_size([E | Es]) ->
- chars_size(E) + chars_size(Es);
-chars_size([]) ->
- 0;
-chars_size(B) when is_binary(B) ->
- byte_size(B).
-
-define(N_SPACES, 30).
spacetab() ->
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 47223b129c..4774c4bf19 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2015. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -752,7 +752,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %"
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
Anno = anno(Line0, Col0, St, Nstr),
@@ -767,7 +767,7 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
case catch list_to_atom(Nwcs) of
diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl
index 2b9d8ff65b..132f8efbbe 100644
--- a/lib/stdlib/src/escript.erl
+++ b/lib/stdlib/src/escript.erl
@@ -224,8 +224,8 @@ return_sections(S, Bin) ->
normalize_section(Name, undefined) ->
{Name, undefined};
normalize_section(shebang, "#!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
if
Stripped =:= ?SHEBANG ->
{shebang, default};
@@ -233,8 +233,8 @@ normalize_section(shebang, "#!" ++ Chars) ->
{shebang, Stripped}
end;
normalize_section(comment, Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(string:strip(Chopped, left, $%), both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(string:trim(Chopped, leading, "$%"), both),
if
Stripped =:= ?COMMENT ->
{comment, default};
@@ -242,8 +242,8 @@ normalize_section(comment, Chars) ->
{comment, Stripped}
end;
normalize_section(emu_args, "%%!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
{emu_args, Stripped};
normalize_section(Name, Chars) ->
{Name, Chars}.
diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl
index 1db004c91e..b6548626f3 100644
--- a/lib/stdlib/src/ets.erl
+++ b/lib/stdlib/src/ets.erl
@@ -1719,7 +1719,7 @@ get_line(P, Default) ->
line_string(Binary) when is_binary(Binary) -> unicode:characters_to_list(Binary);
line_string(Other) -> Other.
-nonl(S) -> string:strip(S, right, $\n).
+nonl(S) -> string:trim(S, trailing, "$\n").
print_number(Tab, Key, Num) ->
Os = ets:lookup(Tab, Key),
@@ -1748,7 +1748,7 @@ do_display_item(_Height, Width, I, Opos) ->
L = to_string(I),
L2 = if
length(L) > Width - 8 ->
- string:substr(L, 1, Width-13) ++ " ...";
+ string:slice(L, 0, Width-13) ++ " ...";
true ->
L
end,
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d7c313f214..0f90b3fc33 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod).
%%% Compiling a wildcard.
+
+%% Define characters used for escaping a \.
+-define(ESCAPE_PREFIX, $@).
+-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]).
+-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]).
+
%% Only for debugging.
compile_wildcard(Pattern) when is_list(Pattern) ->
{compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}.
-compile_wildcard(Pattern, Cwd0) ->
+compile_wildcard(Pattern0, Cwd0) ->
+ Pattern = convert_escapes(Pattern0),
[Root|Rest] = filename:split(Pattern),
case filename:pathtype(Root) of
relative ->
@@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) ->
compile_join({root,PrefixLen,Root}, File) ->
{root,PrefixLen,filename:join(Root, File)}.
-compile_part(Part) ->
+compile_part(Part0) ->
+ Part = wrap_escapes(Part0),
compile_part(Part, false, []).
compile_part_to_sep(Part) ->
@@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) ->
error ->
compile_part(Rest, Upto, [${|Result])
end;
+compile_part([{escaped,X}|Rest], Upto, Result) ->
+ compile_part(Rest, Upto, [X|Result]);
compile_part([X|Rest], Upto, Result) ->
compile_part(Rest, Upto, [X|Result]);
compile_part([], _Upto, Result) ->
@@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper ->
compile_charset1(Rest, compile_range(Lower, Upper, Ordset));
compile_charset1([$]|Rest], Ordset) ->
{ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest};
+compile_charset1([{escaped,X}|Rest], Ordset) ->
+ compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([X|Rest], Ordset) ->
compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([], _Ordset) ->
@@ -486,6 +498,32 @@ compile_alt(Pattern, Result) ->
error
end.
+%% Convert backslashes to an illegal Unicode character to
+%% protect in from filename:split/1.
+
+convert_escapes([?ESCAPE_PREFIX|T]) ->
+ ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T);
+convert_escapes([$\\|T]) ->
+ ?ESCAPE_CHARACTER ++ convert_escapes(T);
+convert_escapes([H|T]) ->
+ [H|convert_escapes(T)];
+convert_escapes([]) ->
+ [].
+
+%% Wrap each escape in a tuple to remove the special meaning for
+%% the character that follows.
+
+wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) ->
+ [?ESCAPE_PREFIX|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) ->
+ [{escaped,C}|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER) ->
+ [];
+wrap_escapes([H|T]) ->
+ [H|wrap_escapes(T)];
+wrap_escapes([]) ->
+ [].
+
badpattern(Reason) ->
error({badpattern,Reason}).
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index ee807dfd09..a322bd002d 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -34,6 +34,38 @@
%% we flatten the arguments immediately on function entry as that makes
%% it easier to ensure that the code works.
+%%
+%% *** Requirements on Raw Filename Format ***
+%%
+%% These requirements are due to the 'filename' module
+%% in stdlib. This since it is documented that it
+%% should be able to operate on raw filenames as well
+%% as ordinary filenames.
+%%
+%% A raw filename *must* be a byte sequence where:
+%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded
+%% as a byte with the corresponding value. That is,
+%% the most significant bit in the byte encoding the
+%% codepoint is never set.
+%% 2. Codepoints greater than 127 *must* be encoded
+%% with the most significant bit set in *every* byte
+%% encoding it.
+%%
+%% Latin1 and UTF-8 meet these requirements while
+%% UTF-16 and UTF-32 don't.
+%%
+%% On Windows filenames are natively stored as malformed
+%% UTF-16LE (lonely surrogates may appear). A more correct
+%% description than UTF-16 would be an array of 16-bit
+%% words... In order to meet the requirements of the
+%% raw file format we convert the malformed UTF-16LE to
+%% malformed UTF-8 which meet the requirements.
+%%
+%% Note that these requirements are today only OTP
+%% internal (erts-stdlib internal) requirements that
+%% could be changed.
+%%
+
-export([absname/1, absname/2, absname_join/2,
basename/1, basename/2, dirname/1,
extension/1, join/1, join/2, pathtype/1,
@@ -41,6 +73,7 @@
safe_relative_path/1]).
-export([find_src/1, find_src/2]). % deprecated
-export([basedir/2, basedir/3]).
+-export([validate/1]).
%% Undocumented and unsupported exports.
-export([append/2]).
@@ -1053,10 +1086,10 @@ basedir_linux(Type) ->
user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true);
site_data ->
Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false),
- string:tokens(Base,":");
+ string:lexemes(Base, ":");
site_config ->
Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false),
- string:tokens(Base,":")
+ string:lexemes(Base, ":")
end.
-define(basedir_darwin_user_data, "Library/Application Support").
@@ -1152,3 +1185,72 @@ basedir_os_type() ->
{win32,_} -> windows;
_ -> linux
end.
+
+%%
+%% validate/1
+%%
+
+-spec validate(FileName) -> boolean() when
+ FileName :: file:name_all().
+
+validate(FileName) when is_binary(FileName) ->
+ %% Raw filename...
+ validate_bin(FileName);
+validate(FileName) when is_list(FileName);
+ is_atom(FileName) ->
+ validate_list(FileName,
+ file:native_name_encoding(),
+ os:type()).
+
+validate_list(FileName, Enc, Os) ->
+ try
+ true = validate_list(FileName, Enc, Os, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_list([], _Enc, _Os, Chars) ->
+ Chars;
+validate_list(C, Enc, Os, Chars) when is_integer(C) ->
+ validate_char(C, Enc, Os),
+ Chars+1;
+validate_list(A, Enc, Os, Chars) when is_atom(A) ->
+ validate_list(atom_to_list(A), Enc, Os, Chars);
+validate_list([H|T], Enc, Os, Chars) ->
+ NewChars = validate_list(H, Enc, Os, Chars),
+ validate_list(T, Enc, Os, NewChars).
+
+%% C is always an integer...
+% validate_char(C, _, _) when not is_integer(C) ->
+% throw(invalid);
+validate_char(C, _, _) when C < 1 ->
+ throw(invalid); %% No negative or null characters...
+validate_char(C, latin1, _) when C > 255 ->
+ throw(invalid);
+validate_char(C, utf8, _) when C >= 16#110000 ->
+ throw(invalid);
+validate_char(C, utf8, {win32, _}) when C > 16#ffff ->
+ throw(invalid); %% invalid win wchar...
+validate_char(_C, utf8, {win32, _}) ->
+ ok; %% Range below is accepted on windows...
+validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF ->
+ throw(invalid); %% invalid unicode range...
+validate_char(_, _, _) ->
+ ok.
+
+validate_bin(Bin) ->
+ %% Raw filename. That is, we do not interpret
+ %% the encoding, but we still do not accept
+ %% null characters...
+ try
+ true = validate_bin(Bin, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_bin(<<>>, Bs) ->
+ Bs;
+validate_bin(<<0, _Rest/binary>>, _Bs) ->
+ throw(invalid); %% No null characters allowed...
+validate_bin(<<_B, Rest/binary>>, Bs) ->
+ validate_bin(Rest, Bs+1).
diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl
index 7daa7a9fe4..ac172325b5 100644
--- a/lib/stdlib/src/gen_server.erl
+++ b/lib/stdlib/src/gen_server.erl
@@ -116,23 +116,27 @@
%%%=========================================================================
-callback init(Args :: term()) ->
- {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate} |
+ {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term()} | ignore.
-callback handle_call(Request :: term(), From :: {pid(), Tag :: term()},
State :: term()) ->
{reply, Reply :: term(), NewState :: term()} |
- {reply, Reply :: term(), NewState :: term(), timeout() | hibernate} |
+ {reply, Reply :: term(), NewState :: term(), timeout() | hibernate | {continue, term()}} |
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), Reply :: term(), NewState :: term()} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_cast(Request :: term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_info(Info :: timeout | term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
+ {stop, Reason :: term(), NewState :: term()}.
+-callback handle_continue(Info :: term(), State :: term()) ->
+ {noreply, NewState :: term()} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback terminate(Reason :: (normal | shutdown | {shutdown, term()} |
term()),
@@ -149,7 +153,7 @@
Status :: term().
-optional_callbacks(
- [handle_info/2, terminate/2, code_change/3, format_status/2]).
+ [handle_info/2, handle_continue/2, terminate/2, code_change/3, format_status/2]).
%%% -----------------------------------------------------------------
%%% Starts a generic server.
@@ -309,7 +313,7 @@ enter_loop(Mod, Options, State, ServerName, Timeout) ->
Name = gen:get_proc_name(ServerName),
Parent = gen:get_parent(),
Debug = gen:debug_options(Name, Options),
- HibernateAfterTimeout = gen:hibernate_after(Options),
+ HibernateAfterTimeout = gen:hibernate_after(Options),
loop(Parent, Name, State, Mod, Timeout, HibernateAfterTimeout, Debug).
%%%========================================================================
@@ -374,6 +378,19 @@ init_it(Mod, Args) ->
%%% ---------------------------------------------------
%%% The MAIN loop.
%%% ---------------------------------------------------
+
+loop(Parent, Name, State, Mod, {continue, Continue} = Msg, HibernateAfterTimeout, Debug) ->
+ Reply = try_dispatch(Mod, handle_continue, Continue, State),
+ case Debug of
+ [] ->
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State);
+ _ ->
+ Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, Msg),
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State, Debug1)
+ end;
+
loop(Parent, Name, State, Mod, hibernate, HibernateAfterTimeout, Debug) ->
proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod, HibernateAfterTimeout, Debug]);
diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl
index 1110d18af6..cd6312855d 100644
--- a/lib/stdlib/src/gen_statem.erl
+++ b/lib/stdlib/src/gen_statem.erl
@@ -296,7 +296,7 @@
(Reason :: term()).
%% Format the callback module state in some sensible that is
-%% often condensed way. For StatusOption =:= 'normal' the perferred
+%% often condensed way. For StatusOption =:= 'normal' the preferred
%% return term is [{data,[{"State",FormattedState}]}], and for
%% StatusOption =:= 'terminate' it is just FormattedState.
-callback format_status(
@@ -510,8 +510,6 @@ call(ServerRef, Request, Timeout) ->
parse_timeout(Timeout) ->
case Timeout of
- {clean_timeout,infinity} ->
- {dirty_timeout,infinity};
{clean_timeout,_} ->
Timeout;
{dirty_timeout,_} ->
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 4b2d15c8b3..e345810ca0 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -380,7 +380,7 @@ float_e(_Fl, {Ds,E}, P) ->
{Fs,false} -> [Fs|float_exp(E-1)]
end.
-%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}.
+%% float_man([Digit], Icount, Dcount) -> {[Char],CarryFlag}.
%% Generate the characters in the mantissa from the digits with Icount
%% characters before the '.' and Dcount decimals. Handle carry and let
%% caller decide what to do at top.
@@ -395,7 +395,7 @@ float_man([D|Ds], I, Dc) ->
{Cs,false} -> {[D|Cs],false}
end;
float_man([], I, Dc) -> %Pad with 0's
- {string:chars($0, I, [$.|string:chars($0, Dc)]),false}.
+ {lists:duplicate(I, $0) ++ [$.|lists:duplicate(Dc, $0)],false}.
float_man([D|_], 0) when D >= $5 -> {[],true};
float_man([_|_], 0) -> {[],false};
@@ -405,7 +405,7 @@ float_man([D|Ds], Dc) ->
{Cs,true} -> {[D+1|Cs],false};
{Cs,false} -> {[D|Cs],false}
end;
-float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's
+float_man([], Dc) -> {lists:duplicate(Dc, $0),false}. %Pad with 0's
%% float_exp(Exponent) -> [Char].
%% Generate the exponent of a floating point number. Always include sign.
@@ -429,7 +429,7 @@ fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 ->
float_f(Fl, Fd, P) when Fl < 0.0 ->
[$-|float_f(-Fl, Fd, P)];
float_f(Fl, {Ds,E}, P) when E =< 0 ->
- float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's
+ float_f(Fl, {lists:duplicate(-E+1, $0)++Ds,1}, P); %Prepend enough 0's
float_f(_Fl, {Ds,E}, P) ->
case float_man(Ds, E, P) of
{Fs,true} -> "1" ++ Fs; %Handle carry
@@ -751,7 +751,7 @@ adjust(Data, Pad, right) -> [Pad|Data].
flat_trunc(List, N) when is_integer(N), N >= 0 ->
string:slice(List, 0, N).
-%% A deep version of string:chars/2,3
+%% A deep version of lists:duplicate/2
chars(_C, 0) ->
[];
diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl
index c6eb0d7915..a7980cc294 100644
--- a/lib/stdlib/src/lib.erl
+++ b/lib/stdlib/src/lib.erl
@@ -646,7 +646,7 @@ pp_arguments(PF, As, I, Enc) ->
Ll = length(L),
A = list_to_atom(lists:duplicate(Ll, $a)),
S0 = unicode:characters_to_list(PF([A | T], I+1), Enc),
- brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc);
+ brackets_to_parens([$[,L,string:slice(S0, 1+Ll)], Enc);
_ ->
brackets_to_parens(PF(As, I+1), Enc)
end.
diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index 5b488cc677..122b476ddb 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -609,6 +609,52 @@ obsolete_1(filename, find_src, 2) ->
obsolete_1(erlang, hash, 2) ->
{removed, {erlang, phash2, 2}, "20.0"};
+%% Added in OTP-21
+obsolete_1(string, len, 1) ->
+ {deprecated, "deprecated; use string:length/3 instead"};
+obsolete_1(string, concat, 2) ->
+ {deprecated, "deprecated; use [Str1,Str2] instead"};
+obsolete_1(string, str, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rstr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, chr, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rchr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, span, 2) ->
+ {deprecated, "deprecated; use string:take/2 instead"};
+obsolete_1(string, cspan, 2) ->
+ {deprecated, "deprecated; use string:take/3 instead"};
+obsolete_1(string, substr, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, tokens, 2) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, chars, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, copies, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, words, _) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, strip, _) ->
+ {deprecated, "deprecated; use string:trim/3 instead"};
+obsolete_1(string, sub_word, _) ->
+ {deprecated, "deprecated; use string:nth_lexeme/3 instead"};
+obsolete_1(string, sub_string, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, left, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, right, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, centre, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, join, _) ->
+ {deprecated, "deprecated; use lists:join/2 instead"};
+obsolete_1(string, to_upper, _) ->
+ {deprecated, "deprecated; use string:uppercase/1 or string:titlecase/1 instead"};
+obsolete_1(string, to_lower, _) ->
+ {deprecated, "deprecated; use string:lowercase/1 or string:casefold/1 instead"};
+
%% not obsolete
obsolete_1(_, _, _) ->
diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl
index 05950a1d7c..b12ff205b1 100644
--- a/lib/stdlib/src/pool.erl
+++ b/lib/stdlib/src/pool.erl
@@ -25,7 +25,7 @@
%% with the least load !!!!
%% This function is callable from any node including the master
%% That is part of the pool
-%% nodes are scheduled on a per usgae basis and per load basis,
+%% nodes are scheduled on a per usage basis and per load basis,
%% Whenever we use a node, we put at the end of the queue, and whenever
%% a node report a change in load, we insert it accordingly
@@ -197,7 +197,7 @@ pure_insert({Load,Node},[{L,N}|Tail]) when Load < L ->
pure_insert(L,[H|T]) -> [H|pure_insert(L,T)].
%% Really should not measure the contributions from
-%% the back ground processes here .... which we do :-(
+%% the background processes here .... which we do :-(
%% We don't have to monitor the master, since we're slaves anyway
statistic_collector() ->
@@ -213,7 +213,7 @@ statistic_collector(I) ->
stat_loop(M, 999999)
end.
-%% Do not tell the master about our load if it has not changed
+%% Do not tell the master about our load if it has not changed
stat_loop(M, Old) ->
sleep(2000),
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 7a8a5e6d4a..362e98006e 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -21,8 +21,8 @@
%% Multiple PRNG module for Erlang/OTP
%% Copyright (c) 2015-2016 Kenji Rikitake
%%
-%% exrop (xoroshiro116+) added and statistical distribution
-%% improvements by the Erlang/OTP team 2017
+%% exrop (xoroshiro116+) added, statistical distribution
+%% improvements and uniform_real added by the Erlang/OTP team 2017
%% =====================================================================
-module(rand).
@@ -30,10 +30,14 @@
-export([seed_s/1, seed_s/2, seed/1, seed/2,
export_seed/0, export_seed_s/1,
uniform/0, uniform/1, uniform_s/1, uniform_s/2,
+ uniform_real/0, uniform_real_s/1,
jump/0, jump/1,
normal/0, normal/2, normal_s/1, normal_s/3
]).
+%% Debug
+-export([make_float/3, float2str/1, bc64/1]).
+
-compile({inline, [exs64_next/1, exsplus_next/1,
exs1024_next/1, exs1024_calc/2,
exrop_next/1, exrop_next_s/2,
@@ -60,6 +64,10 @@
%% N i evaluated 3 times
(?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))).
+-define(
+ BC(V, N),
+ bc((V), ?BIT((N) - 1), N)).
+
%%-define(TWO_POW_MINUS53, (math:pow(2, -53))).
-define(TWO_POW_MINUS53, 1.11022302462515657e-16).
@@ -84,14 +92,21 @@
%% The 'bits' field indicates how many bits the integer
%% returned from 'next' has got, i.e 'next' shall return
%% an random integer in the range 0..(2^Bits - 1).
-%% At least 53 bits is required for the floating point
-%% producing fallbacks. This field is only used when
-%% the 'uniform' or 'uniform_n' fields are not defined.
+%% At least 55 bits is required for the floating point
+%% producing fallbacks, but 56 bits would be more future proof.
%%
%% The fields 'next', 'uniform' and 'uniform_n'
-%% implement the algorithm. If 'uniform' or 'uinform_n'
+%% implement the algorithm. If 'uniform' or 'uniform_n'
%% is not present there is a fallback using 'next' and either
-%% 'bits' or the deprecated 'max'.
+%% 'bits' or the deprecated 'max'. The 'next' function
+%% must generate a word with at least 56 good random bits.
+%%
+%% The 'weak_low_bits' field indicate how many bits are of
+%% lesser quality and they will not be used by the floating point
+%% producing functions, nor by the range producing functions
+%% when more bits are needed, to avoid weak bits in the middle
+%% of the generated bits. The lowest bits from the range
+%% functions still have the generator's quality.
%%
-type alg_handler() ::
#{type := alg(),
@@ -148,11 +163,7 @@
%% For ranges larger than the algorithm bit size
uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) ->
- WeakLowBits =
- case Alg of
- #{weak_low_bits:=WLB} -> WLB;
- #{} -> 0
- end,
+ WeakLowBits = maps:get(weak_low_bits, Alg, 0),
%% Maybe waste the lowest bit(s) when shifting in new bits
Shift = Bits - WeakLowBits,
ShiftMask = bnot ?MASK(WeakLowBits),
@@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
{(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}};
uniform_s({#{max:=Max, next:=Next} = Alg, R0}) ->
{V, R1} = Next(R0),
- %% Old broken algorithm with non-uniform density
+ %% Old algorithm with non-uniform density
{V / (Max + 1), {Alg, R1}}.
@@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0})
?uniform_range(N, Alg, R1, V, MaxMinusN, I);
uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
when is_integer(N), 1 =< N ->
- %% Old broken algorithm with skewed probability
+ %% Old algorithm with skewed probability
%% and gap in ranges > Max
{V, R1} = Next(R0),
if
@@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
{trunc(F * N) + 1, {Alg, R1}}
end.
+%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0,
+%% updating the state in the process dictionary.
+
+-spec uniform_real() -> X :: float().
+uniform_real() ->
+ {X, Seed} = uniform_real_s(seed_get()),
+ _ = seed_put(Seed),
+ X.
+
+%% uniform_real_s/1: given a state, uniform_s/1
+%% returns a random float X where 0.0 < X =< 1.0,
+%% and a new state.
+%%
+%% This function does not use the same form of uniformity
+%% as the uniform_s/1 function.
+%%
+%% Instead, this function does not generate numbers with equal
+%% distance in the interval, but rather tries to keep all mantissa
+%% bits random also for small numbers, meaning that the distance
+%% between possible numbers decreases when the numbers
+%% approaches 0.0, as does the possibility for a particular
+%% number. Hence uniformity is preserved.
+%%
+%% To generate 56 bits at the time instead of 53 is actually
+%% a speed optimization since the probability to have to
+%% generate a second word decreases by 1/2 for every extra bit.
+%%
+%% This function generates normalized numbers, so the smallest number
+%% that can be generated is 2^-1022 with the distance 2^-1074
+%% to the next to smallest number, compared to 2^-53 for uniform_s/1.
+%%
+%% This concept of uniformity should work better for applications
+%% where you need to calculate 1.0/X or math:log(X) since those
+%% operations benefits from larger precision approaching 0.0,
+%% and that this function does not return 0.0 nor denormalized
+%% numbers very close to 0.0. The log() operation in The Box-Muller
+%% transformation for normal distribution is an example of this.
+%%
+%%-define(TWO_POW_MINUS55, (math:pow(2, -55))).
+%%-define(TWO_POW_MINUS110, (math:pow(2, -110))).
+%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17).
+%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34).
+%%
+-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}.
+uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number without using the weak low bits.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits)
+ end;
+uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number.
+ %% Ignore the weak low bits for these old algorithms,
+ %% just produce something reasonable.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = ?MASK(56, V1),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, 56)
+ end.
+
+uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56
+ %% This is a very theoretical bottom case.
+ %% The odds of getting here is about 2^-1008,
+ %% through a white box test case, or thanks to
+ %% a malfunctioning PRNG producing 18 56-bit zeros in a row.
+ %%
+ %% Fill up to 53 bits, we have at most 52
+ B0 = (53 - ?BC(M0, 52)), % Missing bits
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)),
+ {Alg, R1}};
+uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) ->
+ if
+ %% Optimize the most probable.
+ %% Fill up to 53 bits.
+ ?BIT(51) =< M0 ->
+ %% We have 52 bits in M0 - need 1
+ {(((M0 bsl 1) bor (V1 bsr (Bits - 1)))
+ * math:pow(2.0, BitNo - 1)),
+ {Alg, R1}};
+ ?BIT(50) =< M0 ->
+ %% We have 51 bits in M0 - need 2
+ {(((M0 bsl 2) bor (V1 bsr (Bits - 2)))
+ * math:pow(2.0, BitNo - 2)),
+ {Alg, R1}};
+ ?BIT(49) =< M0 ->
+ %% We have 50 bits in M0 - need 3
+ {(((M0 bsl 3) bor (V1 bsr (Bits - 3)))
+ * math:pow(2.0, BitNo - 3)),
+ {Alg, R1}};
+ M0 == 0 ->
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}};
+ BitNo =:= -1008 ->
+ %% Endgame
+ %% For the last round we can not have 14 zeros or more
+ %% at the top of M1 because then we will underflow,
+ %% so we need at least 43 bits
+ if
+ ?BIT(42) =< M1 ->
+ %% We have 43 bits - get the last bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1);
+ true ->
+ %% Would underflow 2^-1022 - start all over
+ %%
+ %% We could just crash here since the odds for
+ %% the PRNG being broken is much higher than
+ %% for a good PRNG generating this many zeros
+ %% in a row. Maybe we should write an error
+ %% report or call this a system limit...?
+ uniform_real_s({Alg, R1})
+ end;
+ true ->
+ %% Need more bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1)
+ end;
+ true ->
+ %% Fill up to 53 bits
+ B0 = 53 - ?BC(M0, 49), % Number of bits we need to append
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0)))
+ * math:pow(2.0, BitNo - B0)),
+ {Alg, R1}}
+ end.
+%%
+uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits);
+uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56).
+
%% jump/1: given a state, jump/1
%% returns a new state which is equivalent to that
%% after a large number of call defined for each algorithm.
@@ -1025,3 +1219,42 @@ normal_fi(Indx) ->
1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03,
5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03,
1.2602859304985975e-03}).
+
+%%%bitcount64(0) -> 0;
+%%%bitcount64(V) -> 1 + bitcount(V, 64).
+%%%
+%%%-define(
+%%% BITCOUNT(V, N),
+%%% bitcount(V, N) ->
+%%% if
+%%% (1 bsl ((N) bsr 1)) =< (V) ->
+%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1));
+%%% true ->
+%%% bitcount((V), ((N) bsr 1))
+%%% end).
+%%%?BITCOUNT(V, 64);
+%%%?BITCOUNT(V, 32);
+%%%?BITCOUNT(V, 16);
+%%%?BITCOUNT(V, 8);
+%%%?BITCOUNT(V, 4);
+%%%?BITCOUNT(V, 2);
+%%%bitcount(_, 1) -> 0.
+
+bc64(V) -> ?BC(V, 64).
+
+%% Linear from high bit - higher probability first gives faster execution
+bc(V, B, N) when B =< V -> N;
+bc(V, B, N) -> bc(V, B bsr 1, N - 1).
+
+make_float(S, E, M) ->
+ <<F/float>> = <<S:1, E:11, M:52>>,
+ F.
+
+float2str(N) ->
+ <<S:1, E:11, M:52>> = <<(float(N))/float>>,
+ lists:flatten(
+ io_lib:format(
+ "~c~c.~13.16.0bE~b",
+ [case S of 1 -> $-; 0 -> $+ end,
+ case E of 0 -> $0; _ -> $1 end,
+ M, E - 16#3ff])).
diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl
index d7cf6386f5..b3f3206d67 100644
--- a/lib/stdlib/src/slave.erl
+++ b/lib/stdlib/src/slave.erl
@@ -320,7 +320,7 @@ mk_cmd(Host, Name, Args, Waiter, Prog0) ->
%% emulator and flags as the test node. The return from lib:progname()
%% could then typically be '/<full_path_to>/cerl -gcov').
quote_progname(Progname) ->
- do_quote_progname(string:tokens(to_list(Progname)," ")).
+ do_quote_progname(string:lexemes(to_list(Progname)," ")).
do_quote_progname([Prog]) ->
"\""++Prog++"\"";
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 3c449d3cb9..5fb48acfab 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -101,13 +101,14 @@
timer,
unicode,
unicode_util,
+ uri_string,
win32reg,
zip]},
{registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/src/stdlib.appup.src b/lib/stdlib/src/stdlib.appup.src
index 800c2c61f3..e4e3fb83e9 100644
--- a/lib/stdlib/src/stdlib.appup.src
+++ b/lib/stdlib/src/stdlib.appup.src
@@ -18,9 +18,7 @@
%% %CopyrightEnd%
{"%VSN%",
%% Up from - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
%% Down to - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
}.
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 4972da297d..5a4d2df2a6 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -87,6 +87,16 @@
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
+-deprecated([{len,1},{concat,2},
+ {str,2},{chr,2},{rchr,2},{rstr,2},
+ {span,2},{cspan,2},{substr,'_'},{tokens,2},
+ {chars,'_'},
+ {copies,2},{words,'_'},{strip,'_'},
+ {sub_word,'_'},{left,'_'},{right,'_'},
+ {sub_string,'_'},{centre,'_'},{join,2},
+ {to_upper,1}, {to_lower,1}
+ ]).
+
%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: string(),
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
new file mode 100644
index 0000000000..22212da222
--- /dev/null
+++ b/lib/stdlib/src/uri_string.erl
@@ -0,0 +1,1842 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%
+%% [RFC 3986, Chapter 3. Syntax Components]
+%%
+%% The generic URI syntax consists of a hierarchical sequence of
+%% components referred to as the scheme, authority, path, query, and
+%% fragment.
+%%
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%%
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%
+%% The scheme and path components are required, though the path may be
+%% empty (no characters). When authority is present, the path must
+%% either be empty or begin with a slash ("/") character. When
+%% authority is not present, the path cannot begin with two slash
+%% characters ("//"). These restrictions result in five different ABNF
+%% rules for a path (Section 3.3), only one of which will match any
+%% given URI reference.
+%%
+%% The following are two example URIs and their component parts:
+%%
+%% foo://example.com:8042/over/there?name=ferret#nose
+%% \_/ \______________/\_________/ \_________/ \__/
+%% | | | | |
+%% scheme authority path query fragment
+%% | _____________________|__
+%% / \ / \
+%% urn:example:animal:ferret:nose
+%%
+%%
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%
+%%
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%
+%%
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%
+%%
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%
+%%
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%
+%%
+%% [RFC 3986, Chapter 4.3. Absolute URI]
+%%
+%% Some protocol elements allow only the absolute form of a URI without
+%% a fragment identifier. For example, defining a base URI for later
+%% use by relative references calls for an absolute-URI syntax rule that
+%% does not allow a fragment.
+%%
+%% absolute-URI = scheme ":" hier-part [ "?" query ]
+%%
+-module(uri_string).
+
+%%-------------------------------------------------------------------------
+%% External API
+%%-------------------------------------------------------------------------
+-export([normalize/1, parse/1,
+ recompose/1, transcode/2]).
+-export_type([error/0, uri_map/0, uri_string/0]).
+
+
+%%-------------------------------------------------------------------------
+%% Internal API
+%%-------------------------------------------------------------------------
+-export([is_host/1, is_path/1]). % suppress warnings
+
+
+%%-------------------------------------------------------------------------
+%% Macros
+%%-------------------------------------------------------------------------
+-define(CHAR(Char), <<Char/utf8>>).
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/binary>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(DEC2HEX(X),
+ if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0;
+ ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10
+ end).
+
+-define(HEX2DEC(X),
+ if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0;
+ ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10;
+ ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10
+ end).
+
+
+%%%=========================================================================
+%%% API
+%%%=========================================================================
+
+%%-------------------------------------------------------------------------
+%% URI compliant with RFC 3986
+%% ASCII %x21 - %x7A ("!" - "z") except
+%% %x34 " double quote
+%% %x60 < less than
+%% %x62 > greater than
+%% %x92 \ backslash
+%% %x94 ^ caret / circumflex
+%% %x96 ` grave / accent
+%%-------------------------------------------------------------------------
+-type uri_string() :: iodata().
+-type error() :: {error, atom(), term()}.
+
+
+%%-------------------------------------------------------------------------
+%% RFC 3986, Chapter 3. Syntax Components
+%%-------------------------------------------------------------------------
+-type uri_map() ::
+ #{fragment => unicode:chardata(),
+ host => unicode:chardata(),
+ path => unicode:chardata(),
+ port => non_neg_integer() | undefined,
+ query => unicode:chardata(),
+ scheme => unicode:chardata(),
+ userinfo => unicode:chardata()} | #{}.
+
+
+%%-------------------------------------------------------------------------
+%% Normalize URIs
+%%-------------------------------------------------------------------------
+-spec normalize(URIString) -> NormalizedURI when
+ URIString :: uri_string(),
+ NormalizedURI :: uri_string().
+normalize(URIString) ->
+ %% Percent-encoding normalization and case normalization for
+ %% percent-encoded triplets are achieved by running parse and
+ %% recompose on the input URI string.
+ recompose(
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(
+ parse(URIString))))).
+
+
+%%-------------------------------------------------------------------------
+%% Parse URIs
+%%-------------------------------------------------------------------------
+-spec parse(URIString) -> URIMap when
+ URIString :: uri_string(),
+ URIMap :: uri_map()
+ | error().
+parse(URIString) when is_binary(URIString) ->
+ try parse_uri_reference(URIString, #{})
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+parse(URIString) when is_list(URIString) ->
+ try
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Recompose URIs
+%%-------------------------------------------------------------------------
+-spec recompose(URIMap) -> URIString when
+ URIMap :: uri_map(),
+ URIString :: uri_string()
+ | error().
+recompose(Map) ->
+ case is_valid_map(Map) of
+ false ->
+ {error, invalid_map, Map};
+ true ->
+ try
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Transcode URIs
+%%-------------------------------------------------------------------------
+-spec transcode(URIString, Options) -> Result when
+ URIString :: uri_string(),
+ Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
+ Result :: uri_string()
+ | error().
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_to_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_to_binary(Output, utf8, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ Flattened = flatten_list(URIString, InEnc),
+ try transcode(Flattened, [], InEnc, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
+%%-------------------------------------------------------------------------
+%% Converts Map fields to lists
+%%-------------------------------------------------------------------------
+convert_mapfields_to_list(Map) ->
+ Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V);
+ (_, V) -> V end,
+ maps:map(Fun, Map).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%-------------------------------------------------------------------------
+-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
+parse_uri_reference(URIString, URI) ->
+ try parse_scheme_start(URIString, URI)
+ catch
+ throw:{_,_,_} ->
+ parse_relative_part(URIString, URI)
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_relative_part(binary(), uri_map()) -> uri_map().
+parse_relative_part(?STRING_REST("//", Rest), URI) ->
+ %% Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
+ end;
+parse_relative_part(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST($/, Path))};
+parse_relative_part(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(Query)};
+parse_relative_part(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
+parse_relative_part(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true ->
+ {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST(Char, Path))};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% path-abempty
+%%-------------------------------------------------------------------------
+-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment(?STRING_REST(Char, Rest), URI) ->
+ case is_pchar(Char) of
+ true -> parse_segment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% path-noscheme
+%%-------------------------------------------------------------------------
+-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true -> parse_segment_nz_nc(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment_nz_nc(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is pchar.
+-spec is_pchar(char()) -> boolean().
+is_pchar($%) -> true; % pct-encoded
+is_pchar($:) -> true;
+is_pchar($@) -> true;
+is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is segment_nz_nc.
+-spec is_segment_nz_nc(char()) -> boolean().
+is_segment_nz_nc($%) -> true; % pct-encoded
+is_segment_nz_nc($@) -> true;
+is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%-------------------------------------------------------------------------
+-spec parse_scheme_start(binary(), uri_map()) -> uri_map().
+parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
+ case is_alpha(Char) of
+ true -> {T, URI1} = parse_scheme(Rest, URI),
+ Scheme = calculate_parsed_scheme(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
+-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_scheme(?STRING_REST($:, Rest), URI) ->
+ {_, URI1} = parse_hier(Rest, URI),
+ {Rest, URI1};
+parse_scheme(?STRING_REST(Char, Rest), URI) ->
+ case is_scheme(Char) of
+ true -> parse_scheme(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_scheme(?STRING_EMPTY, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in scheme
+-spec is_scheme(char()) -> boolean().
+is_scheme($+) -> true;
+is_scheme($-) -> true;
+is_scheme($.) -> true;
+is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+
+%%-------------------------------------------------------------------------
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_hier(?STRING_REST("//", Rest), URI) ->
+ % Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
+ end;
+parse_hier(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_hier(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_hier(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
+ case is_pchar(Char) of
+ true -> % segment_nz
+ {T, URI1} = parse_segment(Rest, URI),
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_hier(?STRING_EMPTY, URI) ->
+ {<<>>, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% The authority component is preceded by a double slash ("//") and is
+%% terminated by the next slash ("/"), question mark ("?"), or number
+%% sign ("#") character, or by the end of the URI.
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%-------------------------------------------------------------------------
+-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
+parse_userinfo(?STRING_REST($@, Rest), URI) ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
+parse_userinfo(?STRING_REST(Char, Rest), URI) ->
+ case is_userinfo(Char) of
+ true -> parse_userinfo(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_userinfo(?STRING_EMPTY, _URI) ->
+ %% URI cannot end in userinfo state
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in userinfo
+-spec is_userinfo(char()) -> boolean().
+is_userinfo($%) -> true; % pct-encoded
+is_userinfo($:) -> true;
+is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%-------------------------------------------------------------------------
+-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_host(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_host(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_host(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_host(?STRING_REST($[, Rest), URI) ->
+ parse_ipv6_bin(Rest, [], URI);
+parse_host(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_host(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_ipv4_bin(Rest, [Char], URI);
+ false -> parse_reg_name(?STRING_REST(Char, Rest), URI)
+ end;
+parse_host(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_reg_name(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_reg_name(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_reg_name(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_reg_name(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_reg_name(?STRING_REST(Char, Rest), URI) ->
+ case is_reg_name(Char) of
+ true -> parse_reg_name(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_reg_name(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+%% Check if char is allowed in reg-name
+-spec is_reg_name(char()) -> boolean().
+is_reg_name($%) -> true;
+is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv4(Char) of
+ true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in IPv4 addresses
+-spec is_ipv4(char()) -> boolean().
+is_ipv4($.) -> true;
+is_ipv4(Char) -> is_digit(Char).
+
+-spec validate_ipv4_address(list()) -> list().
+validate_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) ->
+ _ = validate_ipv6_address(lists:reverse(Acc)),
+ parse_ipv6_bin_end(Rest, URI);
+parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+%% Check if char is allowed in IPv6 addresses
+-spec is_ipv6(char()) -> boolean().
+is_ipv6($:) -> true;
+is_ipv6($.) -> true;
+is_ipv6(Char) -> is_hex_digit(Char).
+
+
+-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin_end(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+-spec validate_ipv6_address(list()) -> list().
+validate_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%-------------------------------------------------------------------------
+-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_port(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_port(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_port(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_port(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_port(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_port(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_query(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_query(?STRING_REST(Char, Rest), URI) ->
+ case is_query(Char) of
+ true -> parse_query(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_query(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in query
+-spec is_query(char()) -> boolean().
+is_query($/) -> true;
+is_query($?) -> true;
+is_query(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_fragment(?STRING_REST(Char, Rest), URI) ->
+ case is_fragment(Char) of
+ true -> parse_fragment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_fragment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in fragment
+-spec is_fragment(char()) -> boolean().
+is_fragment($/) -> true;
+is_fragment($?) -> true;
+is_fragment(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%-------------------------------------------------------------------------
+
+%% Check if char is sub-delim.
+-spec is_sub_delim(char()) -> boolean().
+is_sub_delim($!) -> true;
+is_sub_delim($$) -> true;
+is_sub_delim($&) -> true;
+is_sub_delim($') -> true;
+is_sub_delim($() -> true;
+is_sub_delim($)) -> true;
+
+is_sub_delim($*) -> true;
+is_sub_delim($+) -> true;
+is_sub_delim($,) -> true;
+is_sub_delim($;) -> true;
+is_sub_delim($=) -> true;
+is_sub_delim(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%-------------------------------------------------------------------------
+-spec is_unreserved(char()) -> boolean().
+is_unreserved($-) -> true;
+is_unreserved($.) -> true;
+is_unreserved($_) -> true;
+is_unreserved($~) -> true;
+is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+-spec is_alpha(char()) -> boolean().
+is_alpha(C)
+ when $A =< C, C =< $Z;
+ $a =< C, C =< $z -> true;
+is_alpha(_) -> false.
+
+-spec is_digit(char()) -> boolean().
+is_digit(C)
+ when $0 =< C, C =< $9 -> true;
+is_digit(_) -> false.
+
+-spec is_hex_digit(char()) -> boolean().
+is_hex_digit(C)
+ when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true;
+is_hex_digit(_) -> false.
+
+
+%% Remove enclosing brackets from binary
+-spec remove_brackets(binary()) -> binary().
+remove_brackets(<<$[/utf8, Rest/binary>>) ->
+ {H,T} = split_binary(Rest, byte_size(Rest) - 1),
+ case T =:= <<$]/utf8>> of
+ true -> H;
+ false -> Rest
+ end;
+remove_brackets(Addr) -> Addr.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, <<>>) ->
+ strip_last_char(Input, [$:]);
+calculate_parsed_scheme(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#]);
+calculate_parsed_part(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#,$@]);
+calculate_parsed_userinfo(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_host_port(binary(), binary()) -> binary().
+calculate_parsed_host_port(Input, <<>>) ->
+ strip_last_char(Input, [$:,$?,$#,$/]);
+calculate_parsed_host_port(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+calculate_parsed_query_fragment(Input, <<>>) ->
+ strip_last_char(Input, [$#]);
+calculate_parsed_query_fragment(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+get_port(<<>>) ->
+ undefined;
+get_port(B) ->
+ try binary_to_integer(B)
+ catch
+ error:badarg ->
+ throw({error, invalid_uri, B})
+ end.
+
+
+%% Strip last char if it is in list
+%%
+%% This function is optimized for speed: parse/1 is about 10% faster than
+%% with an alternative implementation based on lists and sets.
+strip_last_char(<<>>, _) -> <<>>;
+strip_last_char(Input, [C0]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2,C3]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ C3 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end.
+
+
+%% Get parsed binary
+get_parsed_binary(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.1. Percent-Encoding]
+%%
+%% A percent-encoding mechanism is used to represent a data octet in a
+%% component when that octet's corresponding character is outside the
+%% allowed set or is being used as a delimiter of, or within, the
+%% component. A percent-encoded octet is encoded as a character
+%% triplet, consisting of the percent character "%" followed by the two
+%% hexadecimal digits representing that octet's numeric value. For
+%% example, "%20" is the percent-encoding for the binary octet
+%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+%% character (SP). Section 2.4 describes when percent-encoding and
+%% decoding is applied.
+%%
+%% pct-encoded = "%" HEXDIG HEXDIG
+%%-------------------------------------------------------------------------
+-spec decode_userinfo(binary()) -> binary().
+decode_userinfo(Cs) ->
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
+
+-spec decode_host(binary()) -> binary().
+decode_host(Cs) ->
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
+
+-spec decode_path(binary()) -> binary().
+decode_path(Cs) ->
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
+
+-spec decode_query(binary()) -> binary().
+decode_query(Cs) ->
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
+
+-spec decode_fragment(binary()) -> binary().
+decode_fragment(Cs) ->
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+
+
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ {error,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ _ -> Cs
+ end.
+
+%%-------------------------------------------------------------------------
+%% Percent-encode
+%%-------------------------------------------------------------------------
+
+%% Only validates as scheme cannot have percent-encoded characters
+-spec encode_scheme(list()|binary()) -> list() | binary().
+encode_scheme([]) ->
+ throw({error,invalid_scheme,""});
+encode_scheme(<<>>) ->
+ throw({error,invalid_scheme,<<>>});
+encode_scheme(Scheme) ->
+ case validate_scheme(Scheme) of
+ true -> Scheme;
+ false -> throw({error,invalid_scheme,Scheme})
+ end.
+
+-spec encode_userinfo(list()|binary()) -> list() | binary().
+encode_userinfo(Cs) ->
+ encode(Cs, fun is_userinfo/1).
+
+-spec encode_host(list()|binary()) -> list() | binary().
+encode_host(Cs) ->
+ case classify_host(Cs) of
+ regname -> Cs;
+ ipv4 -> Cs;
+ ipv6 -> bracket_ipv6(Cs);
+ other -> encode(Cs, fun is_reg_name/1)
+ end.
+
+-spec encode_path(list()|binary()) -> list() | binary().
+encode_path(Cs) ->
+ encode(Cs, fun is_path/1).
+
+-spec encode_query(list()|binary()) -> list() | binary().
+encode_query(Cs) ->
+ encode(Cs, fun is_query/1).
+
+-spec encode_fragment(list()|binary()) -> list() | binary().
+encode_fragment(Cs) ->
+ encode(Cs, fun is_fragment/1).
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for percent-decode
+%%-------------------------------------------------------------------------
+decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
+ end;
+decode(<<C,Cs/binary>>, Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
+ end;
+decode(<<>>, _Fun, Acc) ->
+ Acc.
+
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+is_path(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for percent-encode
+%%-------------------------------------------------------------------------
+-spec encode(list()|binary(), fun()) -> list() | binary().
+encode(Component, Fun) when is_list(Component) ->
+ B = unicode:characters_to_binary(Component),
+ unicode:characters_to_list(encode(B, Fun, <<>>));
+encode(Component, Fun) when is_binary(Component) ->
+ encode(Component, Fun, <<>>).
+%%
+encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
+ C = encode_codepoint_binary(Char, Fun),
+ encode(Rest, Fun, <<Acc/binary,C/binary>>);
+encode(<<Char, Rest/binary>>, _Fun, _Acc) ->
+ throw({error,invalid_input,<<Char,Rest/binary>>});
+encode(<<>>, _Fun, Acc) ->
+ Acc.
+
+
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
+encode_codepoint_binary(C, Fun) ->
+ case Fun(C) of
+ false -> percent_encode_binary(C);
+ true -> <<C>>
+ end.
+
+
+-spec percent_encode_binary(integer()) -> binary().
+percent_encode_binary(Code) ->
+ percent_encode_binary(<<Code/utf8>>, <<>>).
+
+
+percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) ->
+ percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
+
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<<H, Rest/binary>>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> other;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%%
+%% The implementation is based on a decision tree that fulfills the
+%% following rules:
+%% - 'path' shall always be present in the input map
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%% - 'host' shall be present in the input map when 'path' starts with
+%% two slashes ("//")
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%% path-abempty = *( "/" segment )
+%% segment = *pchar
+%% - 'host' shall be present if userinfo or port is present in input map
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%% - All fields shall be valid (scheme, userinfo, host, port, path, query
+%% or fragment).
+%%-------------------------------------------------------------------------
+is_valid_map(#{path := Path} = Map) ->
+ ((starts_with_two_slash(Path) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(port, Map) andalso is_valid_map_host(Map))
+ orelse
+ all_fields_valid(Map));
+is_valid_map(#{}) ->
+ false.
+
+
+is_valid_map_host(Map) ->
+ maps:is_key(host, Map) andalso all_fields_valid(Map).
+
+
+all_fields_valid(Map) ->
+ Fun = fun(scheme, _, Acc) -> Acc;
+ (userinfo, _, Acc) -> Acc;
+ (host, _, Acc) -> Acc;
+ (port, _, Acc) -> Acc;
+ (path, _, Acc) -> Acc;
+ (query, _, Acc) -> Acc;
+ (fragment, _, Acc) -> Acc;
+ (_, _, _) -> false
+ end,
+ maps:fold(Fun, true, Map).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := undefined}, URI) ->
+ concat(URI, <<":">>);
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,add_question_mark(encode_query(Query)));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_fragment(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <<A/binary, B/binary>>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_question_mark(Comp) when is_binary(Comp) ->
+ <<$?, Comp/binary>>;
+add_question_mark(Comp) when is_list(Comp) ->
+ [$?|Comp].
+
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>.
+
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <<Comp/binary,$:>>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
+ false -> throw({error, invalid_percent_encoding,L})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_to_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+%% Convert to binary
+convert_to_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Convert to list
+convert_to_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Flatten input list
+flatten_list([], _) ->
+ [];
+flatten_list(L, InEnc) ->
+ flatten_list(L, InEnc, []).
+%%
+flatten_list([H|T], InEnc, Acc) when is_binary(H) ->
+ L = convert_to_list(H, InEnc),
+ flatten_list(T, InEnc, lists:reverse(L) ++ Acc);
+flatten_list([H|T], InEnc, Acc) when is_list(H) ->
+ flatten_list(H ++ T, InEnc, Acc);
+flatten_list([H|T], InEnc, Acc) ->
+ flatten_list(T, InEnc, [H|Acc]);
+flatten_list([], _InEnc, Acc) ->
+ lists:reverse(Acc);
+flatten_list(Arg, _, _) ->
+ throw({error, invalid_input, Arg}).
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for normalize
+%%-------------------------------------------------------------------------
+
+%% 6.2.2.1. Case Normalization
+normalize_case(#{scheme := Scheme, host := Host} = Map) ->
+ Map#{scheme => to_lower(Scheme),
+ host => to_lower(Host)};
+normalize_case(#{host := Host} = Map) ->
+ Map#{host => to_lower(Host)};
+normalize_case(#{scheme := Scheme} = Map) ->
+ Map#{scheme => to_lower(Scheme)};
+normalize_case(#{} = Map) ->
+ Map.
+
+
+to_lower(Cs) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ convert_to_list(to_lower(B), utf8);
+to_lower(Cs) when is_binary(Cs) ->
+ to_lower(Cs, <<>>).
+%%
+to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z ->
+ to_lower(Cs, <<Acc/binary,(C + 32)>>);
+to_lower(<<C,Cs/binary>>, Acc) ->
+ to_lower(Cs, <<Acc/binary,C>>);
+to_lower(<<>>, Acc) ->
+ Acc.
+
+
+%% 6.2.2.3. Path Segment Normalization
+%% 5.2.4. Remove Dot Segments
+normalize_path_segment(Map) ->
+ Path = maps:get(path, Map, undefined),
+ Map#{path => remove_dot_segments(Path)}.
+
+
+remove_dot_segments(Path) when is_binary(Path) ->
+ remove_dot_segments(Path, <<>>);
+remove_dot_segments(Path) when is_list(Path) ->
+ B = convert_to_binary(Path, utf8, utf8),
+ B1 = remove_dot_segments(B, <<>>),
+ convert_to_list(B1, utf8).
+%%
+remove_dot_segments(<<>>, Output) ->
+ Output;
+remove_dot_segments(<<"../",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"./",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"/./",T/binary>>, Output) ->
+ remove_dot_segments(<<$/,T/binary>>, Output);
+remove_dot_segments(<<"/.">>, Output) ->
+ remove_dot_segments(<<$/>>, Output);
+remove_dot_segments(<<"/../",T/binary>>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/,T/binary>>, Out1);
+remove_dot_segments(<<"/..">>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/>>, Out1);
+remove_dot_segments(<<$.>>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(<<"..">>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(Input, Output) ->
+ {First, Rest} = first_path_segment(Input),
+ remove_dot_segments(Rest, <<Output/binary,First/binary>>).
+
+
+first_path_segment(Input) ->
+ F = first_path_segment(Input, <<>>),
+ split_binary(Input, byte_size(F)).
+%%
+first_path_segment(<<$/,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>);
+first_path_segment(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+first_path_segment_end(<<>>, Acc) ->
+ Acc;
+first_path_segment_end(<<$/,_/binary>>, Acc) ->
+ Acc;
+first_path_segment_end(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+remove_last_segment(<<>>) ->
+ <<>>;
+remove_last_segment(B) ->
+ {Init, Last} = split_binary(B, byte_size(B) - 1),
+ case Last of
+ <<$/>> ->
+ Init;
+ _Char ->
+ remove_last_segment(Init)
+ end.
+
+
+%% RFC 3986, 6.2.3. Scheme-Based Normalization
+normalize_scheme_based(Map) ->
+ Scheme = maps:get(scheme, Map, undefined),
+ Port = maps:get(port, Map, undefined),
+ Path= maps:get(path, Map, undefined),
+ normalize_scheme_based(Map, Scheme, Port, Path).
+%%
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "http"; Scheme =:= <<"http">> ->
+ normalize_http(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "https"; Scheme =:= <<"https">> ->
+ normalize_https(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ftp"; Scheme =:= <<"ftp">> ->
+ normalize_ftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ssh"; Scheme =:= <<"ssh">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "sftp"; Scheme =:= <<"sftp">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "tftp"; Scheme =:= <<"tftp">> ->
+ normalize_tftp(Map, Port);
+normalize_scheme_based(Map, _, _, _) ->
+ Map.
+
+
+normalize_http(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 80),
+ normalize_http_path(M1, Path).
+
+
+normalize_https(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 443),
+ normalize_http_path(M1, Path).
+
+
+normalize_ftp(Map, Port) ->
+ normalize_port(Map, Port, 21).
+
+
+normalize_ssh_sftp(Map, Port) ->
+ normalize_port(Map, Port, 22).
+
+
+normalize_tftp(Map, Port) ->
+ normalize_port(Map, Port, 69).
+
+
+normalize_port(Map, Port, Default) ->
+ case Port of
+ Default ->
+ maps:remove(port, Map);
+ _Else ->
+ Map
+ end.
+
+
+normalize_http_path(Map, Path) ->
+ case Path of
+ "" ->
+ Map#{path => "/"};
+ <<>> ->
+ Map#{path => <<"/">>};
+ _Else ->
+ Map
+ end.
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 7b79dcf04d..8490770f3d 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -69,6 +69,7 @@ MODULES= \
sets_test_lib \
sofs_SUITE \
stdlib_SUITE \
+ stdlib_bench_SUITE \
string_SUITE \
supervisor_1 \
supervisor_2 \
@@ -86,6 +87,7 @@ MODULES= \
timer_simple_SUITE \
unicode_SUITE \
unicode_util_SUITE \
+ uri_string_SUITE \
win32reg_SUITE \
y2k_SUITE \
select_SUITE \
@@ -146,7 +148,7 @@ release_spec: opt
release_tests_spec: make_emakefile
$(INSTALL_DIR) "$(RELSYSDIR)"
- $(INSTALL_DATA) stdlib.spec $(EMAKEFILE) \
+ $(INSTALL_DATA) stdlib.spec stdlib_bench.spec $(EMAKEFILE) \
$(ERL_FILES) $(COVERFILE) "$(RELSYSDIR)"
chmod -R u+w "$(RELSYSDIR)"
@tar cf - *_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -)
diff --git a/lib/stdlib/test/erl_internal_SUITE.erl b/lib/stdlib/test/erl_internal_SUITE.erl
index 789a9d4363..7d9df1f989 100644
--- a/lib/stdlib/test/erl_internal_SUITE.erl
+++ b/lib/stdlib/test/erl_internal_SUITE.erl
@@ -80,7 +80,7 @@ callbacks(application) ->
callbacks(gen_server) ->
[{init,1}, {handle_call,3}, {handle_cast,2},
{handle_info,2}, {terminate,2}, {code_change,3},
- {format_status,2}];
+ {format_status,2}, {handle_continue, 2}];
callbacks(gen_fsm) ->
[{init,1}, {handle_event,3}, {handle_sync_event,4},
{handle_info,3}, {terminate,3}, {code_change,4},
@@ -101,7 +101,7 @@ callbacks(supervisor) ->
optional_callbacks(application) ->
[];
optional_callbacks(gen_server) ->
- [{handle_info, 2}, {terminate, 2}, {code_change, 3}, {format_status, 2}];
+ [{handle_info, 2}, {handle_continue, 2}, {terminate, 2}, {code_change, 3}, {format_status, 2}];
optional_callbacks(gen_fsm) ->
[{handle_info, 3}, {terminate, 3}, {code_change, 4}, {format_status, 2}];
optional_callbacks(gen_event) ->
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index 05451a83fb..f329a07b7a 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -1682,7 +1682,7 @@ do_random_test() ->
ets:delete(Set),
verify_etsmem(EtsMem).
-%% Ttest various variants of update_element.
+%% Test various variants of update_element.
update_element(Config) when is_list(Config) ->
EtsMem = etsmem(),
repeat_for_opts(fun update_element_opts/1),
@@ -2283,13 +2283,8 @@ write_concurrency(Config) when is_list(Config) ->
NoHashMem = ets:info(No7,memory),
NoHashMem = ets:info(No8,memory),
- case erlang:system_info(smp_support) of
- true ->
- true = YesMem > NoHashMem,
- true = YesMem > NoTreeMem;
- false ->
- true = YesMem =:= NoHashMem
- end,
+ true = YesMem > NoHashMem,
+ true = YesMem > NoTreeMem,
{'EXIT',{badarg,_}} = (catch ets_new(foo,[public,{write_concurrency,foo}])),
{'EXIT',{badarg,_}} = (catch ets_new(foo,[public,{write_concurrency}])),
@@ -5912,16 +5907,11 @@ add_lists([E1|T1], [E2|T2], Acc) ->
run_smp_workers(InitF,ExecF,FiniF,Laps) ->
run_smp_workers(InitF,ExecF,FiniF,Laps, 0).
run_smp_workers(InitF,ExecF,FiniF,Laps, Exclude) ->
- case erlang:system_info(smp_support) of
- true ->
- case erlang:system_info(schedulers_online) of
- N when N > Exclude ->
- run_workers_do(InitF,ExecF,FiniF,Laps, N - Exclude);
- _ ->
- {skipped, "Too few schedulers online"}
- end;
- false ->
- {skipped,"No smp support"}
+ case erlang:system_info(schedulers_online) of
+ N when N > Exclude ->
+ run_workers_do(InitF,ExecF,FiniF,Laps, N - Exclude);
+ _ ->
+ {skipped, "Too few schedulers online"}
end.
run_sched_workers(InitF,ExecF,FiniF,Laps) ->
@@ -6231,11 +6221,9 @@ spawn_monitor_with_pid(Pid, Fun, N) ->
only_if_smp(Func) ->
only_if_smp(2, Func).
only_if_smp(Schedulers, Func) ->
- case {erlang:system_info(smp_support),
- erlang:system_info(schedulers_online)} of
- {false,_} -> {skip,"No smp support"};
- {true,N} when N < Schedulers -> {skip,"Too few schedulers online"};
- {true,_} -> Func()
+ case erlang:system_info(schedulers_online) of
+ N when N < Schedulers -> {skip,"Too few schedulers online"};
+ _ -> Func()
end.
%% Copy-paste from emulator/test/binary_SUITE.erl
diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl
index c94821bc75..1236fe45f4 100644
--- a/lib/stdlib/test/filelib_SUITE.erl
+++ b/lib/stdlib/test/filelib_SUITE.erl
@@ -120,7 +120,7 @@ wcc(Wc, Error) ->
do_wildcard_1(Dir, Wcf0) ->
do_wildcard_2(Dir, Wcf0),
Wcf = fun(Wc0) ->
- Wc = filename:join(Dir, Wc0),
+ Wc = Dir ++ "/" ++ Wc0,
L = Wcf0(Wc),
[subtract_dir(N, Dir) || N <- L]
end,
@@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) ->
%% Cleanup.
del(Files),
[ok = file:del_dir(D) || D <- lists:reverse(Dirs)],
- ok.
+ do_wildcard_10(Dir, Wcf).
+
+%% ERL-451/OTP-14577: Escape characters using \\.
+do_wildcard_10(Dir, Wcf) ->
+ All0 = ["{abc}","abc","def","---","z--","@a,b","@c"],
+ All = case os:type() of
+ {unix,_} ->
+ %% '?' is allowed in file names on Unix, but
+ %% not on Windows.
+ ["?q"|All0];
+ _ ->
+ All0
+ end,
+ Files = mkfiles(lists:reverse(All), Dir),
+
+ ["{abc}"] = Wcf("\\{a*"),
+ ["{abc}"] = Wcf("\\{abc}"),
+ ["abc","def","z--"] = Wcf("[a-z]*"),
+ ["---","abc","z--"] = Wcf("[a\\-z]*"),
+ ["@a,b","@c"] = Wcf("@{a\\,b,c}"),
+ ["@c"] = Wcf("@{a,b,c}"),
+
+ case os:type() of
+ {unix,_} ->
+ ["?q"] = Wcf("\\?q");
+ _ ->
+ [] = Wcf("\\?q")
+ end,
+ del(Files),
+ ok.
fold_files(Config) when is_list(Config) ->
Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"),
diff --git a/lib/stdlib/test/gen_server_SUITE.erl b/lib/stdlib/test/gen_server_SUITE.erl
index 2e9dc4d4fb..2bc220fef2 100644
--- a/lib/stdlib/test/gen_server_SUITE.erl
+++ b/lib/stdlib/test/gen_server_SUITE.erl
@@ -27,7 +27,7 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2]).
-export([start/1, crash/1, call/1, cast/1, cast_fast/1,
- info/1, abcast/1, multicall/1, multicall_down/1,
+ continue/1, info/1, abcast/1, multicall/1, multicall_down/1,
call_remote1/1, call_remote2/1, call_remote3/1,
call_remote_n1/1, call_remote_n2/1, call_remote_n3/1, spec_init/1,
spec_init_local_registered_parent/1,
@@ -37,7 +37,8 @@
get_state/1, replace_state/1, call_with_huge_message_queue/1,
undef_handle_call/1, undef_handle_cast/1, undef_handle_info/1,
undef_init/1, undef_code_change/1, undef_terminate1/1,
- undef_terminate2/1, undef_in_terminate/1, undef_in_handle_info/1
+ undef_terminate2/1, undef_in_terminate/1, undef_in_handle_info/1,
+ undef_handle_continue/1
]).
-export([stop1/1, stop2/1, stop3/1, stop4/1, stop5/1, stop6/1, stop7/1,
@@ -52,7 +53,7 @@
%% The gen_server behaviour
--export([init/1, handle_call/3, handle_cast/2,
+-export([init/1, handle_call/3, handle_cast/2, handle_continue/2,
handle_info/2, code_change/3, terminate/2, format_status/2]).
suite() ->
@@ -61,7 +62,7 @@ suite() ->
all() ->
[start, {group,stop}, crash, call, cast, cast_fast, info, abcast,
- multicall, multicall_down, call_remote1, call_remote2,
+ continue, multicall, multicall_down, call_remote1, call_remote2,
call_remote3, call_remote_n1, call_remote_n2,
call_remote_n3, spec_init,
spec_init_local_registered_parent,
@@ -76,7 +77,7 @@ groups() ->
[{stop, [],
[stop1, stop2, stop3, stop4, stop5, stop6, stop7, stop8, stop9, stop10]},
{undef_callbacks, [],
- [undef_handle_call, undef_handle_cast, undef_handle_info,
+ [undef_handle_call, undef_handle_cast, undef_handle_info, undef_handle_continue,
undef_init, undef_code_change, undef_terminate1, undef_terminate2]}].
@@ -458,6 +459,47 @@ call(Config) when is_list(Config) ->
ok.
%% --------------------------------------
+%% Test handle_continue.
+%% --------------------------------------
+
+continue(Config) when is_list(Config) ->
+ {ok, Pid} = gen_server:start_link(gen_server_SUITE, {continue, self()}, []),
+ [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ gen_server:call(Pid, {continue_reply, self()}),
+ [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ gen_server:call(Pid, {continue_noreply, self()}),
+ [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ gen_server:cast(Pid, {continue_noreply, self()}),
+ [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ Pid ! {continue_noreply, self()},
+ [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ Pid ! {continue_continue, self()},
+ [{Pid, before_continue}, {Pid, continue}, {Pid, after_continue}] = read_replies(Pid),
+
+ Ref = monitor(process, Pid),
+ Pid ! continue_stop,
+ verify_down_reason(Ref, Pid, normal).
+
+read_replies(Pid) ->
+ receive
+ {Pid, ack} -> read_replies()
+ after
+ 1000 -> ct:fail({continue, ack})
+ end.
+
+read_replies() ->
+ receive
+ Msg -> [Msg | read_replies()]
+ after
+ 0 -> []
+ end.
+
+%% --------------------------------------
%% Test call to nonexisting processes on remote nodes
%% --------------------------------------
@@ -1346,7 +1388,7 @@ echo_loop() ->
%% Test the default implementation of terminate if the callback module
%% does not export it
undef_terminate1(Config) when is_list(Config) ->
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
MRef = monitor(process, Server),
ok = gen_server:stop(Server),
ok = verify_down_reason(MRef, Server, normal).
@@ -1354,7 +1396,7 @@ undef_terminate1(Config) when is_list(Config) ->
%% Test the default implementation of terminate if the callback module
%% does not export it
undef_terminate2(Config) when is_list(Config) ->
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
MRef = monitor(process, Server),
ok = gen_server:stop(Server, {error, test}, infinity),
ok = verify_down_reason(MRef, Server, {error, test}).
@@ -1377,7 +1419,7 @@ undef_init(_Config) ->
%% The upgrade should fail if code_change is expected in the callback module
%% but not exported, but the server should continue with the old code
undef_code_change(Config) when is_list(Config) ->
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
{error, {'EXIT', {undef, [{oc_server, code_change, [_, _, _], _}|_]}}}
= fake_upgrade(Server, ?MODULE),
true = is_process_alive(Server).
@@ -1385,7 +1427,7 @@ undef_code_change(Config) when is_list(Config) ->
%% The server should crash if the handle_call callback is
%% not exported in the callback module
undef_handle_call(_Config) ->
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
try
gen_server:call(Server, call_msg),
ct:fail(should_crash)
@@ -1397,17 +1439,25 @@ undef_handle_call(_Config) ->
%% The server should crash if the handle_cast callback is
%% not exported in the callback module
undef_handle_cast(_Config) ->
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
MRef = monitor(process, Server),
gen_server:cast(Server, cast_msg),
verify_undef_down(MRef, Server, oc_server, handle_cast),
ok.
+%% The server should crash if the handle_continue callback is
+%% not exported in the callback module
+undef_handle_continue(_Config) ->
+ {ok, Server} = oc_server:start(continue),
+ MRef = monitor(process, Server),
+ verify_undef_down(MRef, Server, oc_server, handle_continue),
+ ok.
+
%% The server should log but not crash if the handle_info callback is
%% calling an undefined function
undef_handle_info(Config) when is_list(Config) ->
error_logger_forwarder:register(),
- {ok, Server} = gen_server:start(oc_server, [], []),
+ {ok, Server} = oc_server:start(),
Server ! hej,
wait_until_processed(Server, hej, 10),
true = is_process_alive(Server),
@@ -1570,8 +1620,11 @@ init(hibernate) ->
init(sleep) ->
ct:sleep(1000),
{ok, []};
+init({continue, Pid}) ->
+ self() ! {after_continue, Pid},
+ {ok, [], {continue, {message, Pid}}};
init({state,State}) ->
- {ok, State}.
+ {ok,State}.
handle_call(started_p, _From, State) ->
io:format("FROZ"),
@@ -1604,6 +1657,12 @@ handle_call(shutdown_reason, _From, _State) ->
handle_call({call_undef_fun, Mod, Fun}, _From, State) ->
Mod:Fun(),
{reply, ok, State};
+handle_call({continue_reply, Pid}, _From, State) ->
+ self() ! {after_continue, Pid},
+ {reply, ok, State, {continue, {message, Pid}}};
+handle_call({continue_noreply, Pid}, From, State) ->
+ self() ! {after_continue, Pid},
+ {noreply, State, {continue, {message, Pid, From}}};
handle_call(stop_shutdown_reason, _From, State) ->
{stop,{shutdown,stop_reason},State}.
@@ -1620,6 +1679,9 @@ handle_cast(hibernate_later, _State) ->
handle_cast({call_undef_fun, Mod, Fun}, State) ->
Mod:Fun(),
{noreply, State};
+handle_cast({continue_noreply, Pid}, State) ->
+ self() ! {after_continue, Pid},
+ {noreply, State, {continue, {message, Pid}}};
handle_cast({From, stop}, State) ->
io:format("BAZ"),
{stop, {From,stopped}, State}.
@@ -1657,9 +1719,34 @@ handle_info(continue, From) ->
{noreply, []};
handle_info({From, stop}, State) ->
{stop, {From,stopped_info}, State};
+handle_info({after_continue, Pid}, State) ->
+ Pid ! {self(), after_continue},
+ Pid ! {self(), ack},
+ {noreply, State};
+handle_info({continue_noreply, Pid}, State) ->
+ self() ! {after_continue, Pid},
+ {noreply, State, {continue, {message, Pid}}};
+handle_info({continue_continue, Pid}, State) ->
+ {noreply, State, {continue, {continue, Pid}}};
+handle_info(continue_stop, State) ->
+ {noreply, State, {continue, stop}};
handle_info(_Info, State) ->
{noreply, State}.
+handle_continue({continue, Pid}, State) ->
+ Pid ! {self(), before_continue},
+ self() ! {after_continue, Pid},
+ {noreply, State, {continue, {message, Pid}}};
+handle_continue(stop, State) ->
+ {stop, normal, State};
+handle_continue({message, Pid}, State) ->
+ Pid ! {self(), continue},
+ {noreply, State};
+handle_continue({message, Pid, From}, State) ->
+ Pid ! {self(), continue},
+ gen_server:reply(From, ok),
+ {noreply, State}.
+
code_change(_OldVsn,
{new, {undef_in_code_change, {Mod, Fun}}} = State,
_Extra) ->
diff --git a/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl b/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl
index 4ba37987f3..7b92a49bf6 100644
--- a/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl
+++ b/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl
@@ -22,7 +22,7 @@
-behaviour(gen_server).
%% API
--export([start/0]).
+-export([start/0, start/1]).
%% gen_server callbacks
-export([init/1]).
@@ -30,8 +30,12 @@
-record(state, {}).
start() ->
- gen_server:start({local, ?MODULE}, ?MODULE, [], []).
+ gen_server:start(?MODULE, ok, []).
-init([]) ->
- {ok, #state{}}.
+start(continue) ->
+ gen_server:start(?MODULE, continue, []).
+init(ok) ->
+ {ok, #state{}};
+init(continue) ->
+ {ok, #state{}, {continue, continue}}.
diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README
new file mode 100644
index 0000000000..57602bf719
--- /dev/null
+++ b/lib/stdlib/test/property_test/README
@@ -0,0 +1,12 @@
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% %%%
+%%% WARNING %%%
+%%% %%%
+%%% This is experimental code which may be changed or removed %%%
+%%% anytime without any warning. %%%
+%%% %%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr.
+
diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl
new file mode 100644
index 0000000000..e51a671172
--- /dev/null
+++ b/lib/stdlib/test/property_test/uri_string_recompose.erl
@@ -0,0 +1,361 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_recompose).
+
+-compile(export_all).
+
+-proptest(eqc).
+-proptest([triq,proper]).
+
+-ifndef(EQC).
+-ifndef(PROPER).
+-ifndef(TRIQ).
+-define(EQC,true).
+-endif.
+-endif.
+-endif.
+
+-ifdef(EQC).
+-include_lib("eqc/include/eqc.hrl").
+-define(MOD_eqc,eqc).
+
+-else.
+-ifdef(PROPER).
+-include_lib("proper/include/proper.hrl").
+-define(MOD_eqc,proper).
+
+-else.
+-ifdef(TRIQ).
+-define(MOD_eqc,triq).
+-include_lib("triq/include/triq.hrl").
+
+-endif.
+-endif.
+-endif.
+
+
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(SCHEME, {scheme, scheme()}).
+-define(USER, {userinfo, unicode()}).
+-define(HOST, {host, host_map()}).
+-define(PORT, {port, port()}).
+-define(PATH_ABE, {path, path_abempty_map()}).
+-define(PATH_ABS, {path, path_absolute_map()}).
+-define(PATH_NOS, {path, path_noscheme_map()}).
+-define(PATH_ROO, {path, path_rootless_map()}).
+-define(PATH_EMP, {path, path_empty_map()}).
+-define(QUERY, {query, query_map()}).
+-define(FRAGMENT, {fragment, fragment_map()}).
+
+
+%%%========================================================================
+%%% Properties
+%%%========================================================================
+
+prop_recompose() ->
+ ?FORALL(Map, map(),
+ Map =:= uri_string:parse(uri_string:recompose(Map))
+ ).
+
+%% Stats
+prop_map_key_length_collect() ->
+ ?FORALL(List, map(),
+ collect(length(maps:keys(List)), true)).
+
+prop_map_collect() ->
+ ?FORALL(List, map(),
+ collect(lists:sort(maps:keys(List)), true)).
+
+prop_scheme_collect() ->
+ ?FORALL(List, scheme(),
+ collect(length(List), true)).
+
+
+%%%========================================================================
+%%% Generators
+%%%========================================================================
+
+map() ->
+ ?LET(Gen, comp_proplist(), proplist_to_map(Gen)).
+
+comp_proplist() ->
+ frequency([
+ {2, [?SCHEME,?PATH_ABS]},
+ {2, [?SCHEME,?PATH_ROO]},
+ {2, [?SCHEME,?PATH_EMP]},
+ {2, [?SCHEME,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]},
+
+ {2, [?PATH_ABS]},
+ {2, [?PATH_NOS]},
+ {2, [?PATH_EMP]},
+ {2, [?HOST,?PATH_ABE]},
+ {2, [?USER,?HOST,?PATH_ABE]},
+ {2, [?HOST,?PORT,?PATH_ABE]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+ {2, [?PATH_ABS,?QUERY]},
+ {2, [?PATH_NOS,?QUERY]},
+ {2, [?PATH_EMP,?QUERY]},
+ {2, [?HOST,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?FRAGMENT]},
+ {2, [?PATH_NOS,?FRAGMENT]},
+ {2, [?PATH_EMP,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]},
+
+
+ {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+
+ {2, [?PATH_ABS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_NOS,?QUERY,?FRAGMENT]},
+ {2, [?PATH_EMP,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]},
+ {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}
+ ]).
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+path_abempty_map() ->
+ frequency([{90, path_abe_map()},
+ {10, path_empty_map()}]).
+
+path_abe_map() ->
+ ?SIZED(Length, path_abe_map(Length, [])).
+%%
+path_abe_map(0, Segments) ->
+ ?LET(Gen, Segments, lists:append(Gen));
+path_abe_map(N, Segments) ->
+ path_abe_map(N-1, [slash(),segment()|Segments]).
+
+
+path_absolute_map() ->
+ ?SIZED(Length, path_absolute_map(Length, [])).
+%%
+path_absolute_map(0, Segments) ->
+ ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen));
+path_absolute_map(N, Segments) ->
+ path_absolute_map(N-1, [slash(),segment()|Segments]).
+
+
+path_noscheme_map() ->
+ ?SIZED(Length, path_noscheme_map(Length, [])).
+%%
+path_noscheme_map(0, Segments) ->
+ ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen));
+path_noscheme_map(N, Segments) ->
+ path_noscheme_map(N-1, [slash(),segment()|Segments]).
+
+path_rootless_map() ->
+ ?SIZED(Length, path_rootless_map(Length, [])).
+%%
+path_rootless_map(0, Segments) ->
+ ?LET(Gen, [segment_nz()|Segments], lists:append(Gen));
+path_rootless_map(N, Segments) ->
+ path_rootless_map(N-1, [slash(),segment()|Segments]).
+
+
+segment_nz() ->
+ non_empty(segment()).
+
+segment_nz_nc() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$@])}
+ ]))).
+
+
+segment() ->
+ list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, unicode_char()},
+ {5, oneof([$:, $@])}
+ ])).
+
+slash() ->
+ "/".
+
+path_empty_map() ->
+ "".
+
+
+%%-------------------------------------------------------------------------
+%% Path
+%%-------------------------------------------------------------------------
+host_map() ->
+ frequency([{30, reg_name()},
+ {30, ip_address()}
+ ]).
+
+
+reg_name() ->
+ list(frequency([{30, alpha()},
+ {10, sub_delims()},
+ {10, unicode_char()}
+ ])).
+
+ip_address() ->
+ oneof(["127.0.0.1", "::127.0.0.1",
+ "2001:0db8:0000:0000:0000:0000:1428:07ab",
+ "2001:0db8:0000:0000:0000::1428:07ab",
+ "2001:0db8:0:0:0:0:1428:07ab",
+ "2001:0db8:0::0:1428:07ab"]).
+
+%% Generating only reg-names
+host_uri() ->
+ non_empty(list(frequency([{30, unreserved()},
+ {10, sub_delims()},
+ {10, pct_encoded()}
+ ]))).
+
+%%-------------------------------------------------------------------------
+%% Port, Query, Fragment
+%%-------------------------------------------------------------------------
+port() ->
+ frequency([{10, undefined},
+ {10, range(1,65535)}
+ ]).
+
+query_map() ->
+ unicode().
+
+
+query_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+fragment_map() ->
+ unicode().
+
+fragment_uri() ->
+ [$?| non_empty(list(frequency([{20, pchar()},
+ {5, oneof([$/, $?])} % punctuation
+ ])))].
+
+
+%%-------------------------------------------------------------------------
+%% Scheme
+%%-------------------------------------------------------------------------
+scheme() ->
+ ?SIZED(Length, scheme_start(Length, [])).
+%%
+scheme_start(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme_start(N, L) ->
+ scheme(N-1,[alpha()|L]).
+
+scheme(0, L) ->
+ ?LET(Gen, L, lists:reverse(Gen));
+scheme(N, L) ->
+ scheme(N-1, [scheme_char()|L]).
+
+
+%%-------------------------------------------------------------------------
+%% Misc
+%%-------------------------------------------------------------------------
+unicode() ->
+ list(frequency([{20, alpha()}, % alpha
+ {10, digit()}, % digit
+ {10, unicode_char()} % unicode
+ ])).
+
+scheme_char() ->
+ frequency([{20, alpha()}, % alpha
+ {20, digit()}, % digit
+ {5, oneof([$+, $-, $.])} % punctuation
+ ]).
+
+sub_delims() ->
+ oneof([$!, $$, $&, $', $(, $),
+ $*, $+, $,,$;, $=]).
+
+pchar() ->
+ frequency([{20, unreserved()},
+ {5, pct_encoded()},
+ {5, sub_delims()},
+ {1, oneof([$:, $@])} % punctuation
+ ]).
+
+unreserved() ->
+ frequency([{20, alpha()},
+ {5, digit()},
+ {1, oneof([$-, $., $_, $~])} % punctuation
+ ]).
+
+unicode_char() ->
+ range(913, 1023).
+
+alpha() ->
+ frequency([{20, range($a, $z)}, % letters
+ {20, range($A, $Z)}]). % letters
+
+digit() ->
+ range($0, $9). % numbers
+
+pct_encoded() ->
+ oneof(["%C3%A4", "%C3%A5", "%C3%B6"]).
+
+
+%%%========================================================================
+%%% Helpers
+%%%========================================================================
+proplist_to_map(L) ->
+ lists:foldl(fun({K,V},M) -> M#{K => V};
+ (_,M) -> M
+ end, #{}, L).
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index 432293b656..ef4f9faad9 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -29,11 +29,14 @@
basic_stats_uniform_1/1, basic_stats_uniform_2/1,
basic_stats_standard_normal/1,
basic_stats_normal/1,
+ uniform_real_conv/1,
plugin/1, measure/1,
reference_jump_state/1, reference_jump_procdict/1]).
-export([test/0, gen/1]).
+-export([uniform_real_gen/1, uniform_gen/2]).
+
-include_lib("common_test/include/ct.hrl").
-define(LOOP, 1000000).
@@ -46,7 +49,7 @@ all() ->
[seed, interval_int, interval_float,
api_eq,
reference,
- {group, basic_stats},
+ {group, basic_stats}, uniform_real_conv,
plugin, measure,
{group, reference_jump}
].
@@ -80,7 +83,7 @@ test() ->
end, Tests).
algs() ->
- [exs64, exsplus, exsp, exrop, exs1024, exs1024s].
+ [exrop, exsp, exs1024s, exs64, exsplus, exs1024].
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -101,7 +104,7 @@ seed_1(Alg) ->
_ = rand:uniform(),
S00 = get(rand_seed),
erase(),
- _ = rand:uniform(),
+ _ = rand:uniform_real(),
false = S00 =:= get(rand_seed), %% hopefully
%% Choosing algo and seed
@@ -228,11 +231,13 @@ interval_float(Config) when is_list(Config) ->
interval_float_1(0) -> ok;
interval_float_1(N) ->
X = rand:uniform(),
+ Y = rand:uniform_real(),
if
- 0.0 =< X, X < 1.0 ->
+ 0.0 =< X, X < 1.0, 0.0 < Y, Y < 1.0 ->
ok;
true ->
- io:format("X=~p 0=<~p<1.0~n", [X,X]),
+ io:format("X=~p 0.0=<~p<1.0~n", [X,X]),
+ io:format("Y=~p 0.0<~p<1.0~n", [Y,Y]),
exit({X, rand:export_seed()})
end,
interval_float_1(N-1).
@@ -334,7 +339,13 @@ basic_stats_normal(Config) when is_list(Config) ->
IntendedMeanVariancePairs).
basic_uniform_1(N, S0, Sum, A0) when N > 0 ->
- {X,S} = rand:uniform_s(S0),
+ {X,S} =
+ case N band 1 of
+ 0 ->
+ rand:uniform_s(S0);
+ 1 ->
+ rand:uniform_real_s(S0)
+ end,
I = trunc(X*100),
A = array:set(I, 1+array:get(I,A0), A0),
basic_uniform_1(N-1, S, Sum+X, A);
@@ -400,6 +411,137 @@ normal_s(Mean, Variance, State0) ->
rand:normal_s(Mean, Variance, State0).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% White box test of the conversion to float
+
+uniform_real_conv(Config) when is_list(Config) ->
+ [begin
+%% ct:pal("~13.16.0bx~3.16.0b: ~p~n", [M,E,Gen]),
+ uniform_real_conv_check(M, E, Gen)
+ end || {M, E, Gen} <- uniform_real_conv_data()],
+ uniform_real_scan(0),
+ uniform_real_scan(3).
+
+uniform_real_conv_data() ->
+ [{16#fffffffffffff, -1, [16#3ffffffffffffff]},
+ {16#fffffffffffff, -1, [16#3ffffffffffffe0]},
+ {16#ffffffffffffe, -1, [16#3ffffffffffffdf]},
+ %%
+ {16#0000000000000, -1, [16#200000000000000]},
+ {16#fffffffffffff, -2, [16#1ffffffffffffff]},
+ {16#fffffffffffff, -2, [16#1fffffffffffff0]},
+ {16#ffffffffffffe, -2, [16#1ffffffffffffef]},
+ %%
+ {16#0000000000000, -2, [16#100000000000000]},
+ {16#fffffffffffff, -3, [16#0ffffffffffffff]},
+ {16#fffffffffffff, -3, [16#0fffffffffffff8]},
+ {16#ffffffffffffe, -3, [16#0fffffffffffff7]},
+ %%
+ {16#0000000000000, -3, [16#080000000000000]},
+ {16#fffffffffffff, -4, [16#07fffffffffffff]},
+ {16#fffffffffffff, -4, [16#07ffffffffffffc]},
+ {16#ffffffffffffe, -4, [16#07ffffffffffffb]},
+ %%
+ {16#0000000000000, -4, [16#040000000000000]},
+ {16#fffffffffffff, -5, [16#03fffffffffffff,16#3ffffffffffffff]},
+ {16#fffffffffffff, -5, [16#03ffffffffffffe,16#200000000000000]},
+ {16#ffffffffffffe, -5, [16#03fffffffffffff,16#1ffffffffffffff]},
+ {16#ffffffffffffe, -5, [16#03fffffffffffff,16#100000000000000]},
+ %%
+ {16#0000000000001, -56, [16#000000000000007,16#00000000000007f]},
+ {16#0000000000001, -56, [16#000000000000004,16#000000000000040]},
+ {16#0000000000000, -57, [16#000000000000003,16#20000000000001f]},
+ {16#0000000000000, -57, [16#000000000000000,16#200000000000000]},
+ {16#fffffffffffff, -58, [16#000000000000003,16#1ffffffffffffff]},
+ {16#fffffffffffff, -58, [16#000000000000000,16#1fffffffffffff0]},
+ {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffef]},
+ {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffe0]},
+ %%
+ {16#0000000000000, -58, [16#000000000000000,16#10000000000000f]},
+ {16#0000000000000, -58, [16#000000000000000,16#100000000000000]},
+ {2#11001100000000000000000000000000000000000011000000011, % 53 bits
+ -1022,
+ [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros
+ 2#1100110000000000000000000000000000000000001 bsl 2, % 43 bits
+ 2#1000000011 bsl (56-10+2)]}, % 10 bits
+ {0, -1, % 0.5 after retry
+ [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros
+ 2#111111111111111111111111111111111111111111 bsl 2, % 42 bits - retry
+ 16#200000000000003]}]. % 0.5
+
+-define(UNIFORM_REAL_SCAN_PATTERN, (16#19000000000009)). % 53 bits
+-define(UNIFORM_REAL_SCAN_NUMBER, (1021)).
+
+uniform_real_scan_template(K) ->
+ <<0:?UNIFORM_REAL_SCAN_NUMBER,
+ ?UNIFORM_REAL_SCAN_PATTERN:53,K:2,0:1>>.
+
+uniform_real_scan(K) ->
+ Templ = uniform_real_scan_template(K),
+ N = ?UNIFORM_REAL_SCAN_NUMBER,
+ uniform_real_scan(Templ, N, K).
+
+uniform_real_scan(Templ, N, K) when 0 =< N ->
+ <<_:N/bits,T/bits>> = Templ,
+ Data = uniform_real_scan_data(T, K),
+ uniform_real_conv_check(
+ ?UNIFORM_REAL_SCAN_PATTERN, N - 1 - ?UNIFORM_REAL_SCAN_NUMBER, Data),
+ uniform_real_scan(Templ, N - 1, K);
+uniform_real_scan(_, _, _) ->
+ ok.
+
+uniform_real_scan_data(Templ, K) ->
+ case Templ of
+ <<X:56, T/bits>> ->
+ B = rand:bc64(X),
+ [(X bsl 2) bor K |
+ if
+ 53 =< B ->
+ [];
+ true ->
+ uniform_real_scan_data(T, K)
+ end];
+ _ ->
+ <<X:56, _/bits>> = <<Templ/bits, 0:56>>,
+ [(X bsl 2) bor K]
+ end.
+
+uniform_real_conv_check(M, E, Gen) ->
+ <<F/float>> = <<0:1, (E + 16#3ff):11, M:52>>,
+ try uniform_real_gen(Gen) of
+ F -> F;
+ FF ->
+ ct:pal(
+ "~s =/= ~s: ~s~n",
+ [rand:float2str(FF), rand:float2str(F),
+ [["16#",integer_to_list(G,16),$\s]||G<-Gen]]),
+ ct:fail({neq, FF, F})
+ catch
+ Error:Reason ->
+ ct:pal(
+ "~w:~p ~s: ~s~n",
+ [Error, Reason, rand:float2str(F),
+ [["16#",integer_to_list(G,16),$\s]||G<-Gen]]),
+ ct:fail({Error, Reason, F, erlang:get_stacktrace()})
+ end.
+
+
+uniform_real_gen(Gen) ->
+ State = rand_state(Gen),
+ {F, {#{type := rand_SUITE_list},[]}} = rand:uniform_real_s(State),
+ F.
+
+uniform_gen(Range, Gen) ->
+ State = rand_state(Gen),
+ {N, {#{type := rand_SUITE_list},[]}} = rand:uniform_s(Range, State),
+ N.
+
+%% Loaded dice for white box tests
+rand_state(Gen) ->
+ {#{type => rand_SUITE_list, bits => 58, weak_low_bits => 1,
+ next => fun ([H|T]) -> {H, T} end},
+ Gen}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Test that the user can write algorithms.
plugin(Config) when is_list(Config) ->
@@ -459,213 +601,289 @@ measure(Config) ->
{skip,{will_not_run_in_scaled_time,Scale}}
end.
+-define(CHECK_UNIFORM_RANGE(Gen, Range, X, St),
+ case (Gen) of
+ {(X), (St)} when is_integer(X), 1 =< (X), (X) =< (Range) ->
+ St
+ end).
+-define(CHECK_UNIFORM(Gen, X, St),
+ case (Gen) of
+ {(X), (St)} when is_float(X), 0.0 =< (X), (X) < 1.0 ->
+ St
+ end).
+-define(CHECK_UNIFORM_NZ(Gen, X, St),
+ case (Gen) of
+ {(X), (St)} when is_float(X), 0.0 < (X), (X) =< 1.0 ->
+ St
+ end).
+-define(CHECK_NORMAL(Gen, X, St),
+ case (Gen) of
+ {(X), (St)} when is_float(X) ->
+ St
+ end).
+
do_measure(_Config) ->
- Algos =
+ Algs =
+ algs() ++
try crypto:strong_rand_bytes(1) of
- <<_>> -> [crypto64, crypto]
+ <<_>> -> [crypto64, crypto_cache, crypto]
catch
error:low_entropy -> [];
error:undef -> []
- end ++ algs(),
+ end,
%%
- ct:pal("RNG uniform integer performance~n",[]),
- TMark1 =
+ ct:pal("~nRNG uniform integer range 10000 performance~n",[]),
+ _ =
measure_1(
- random,
fun (_) -> 10000 end,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
+ %%
+ ct:pal("~nRNG uniform integer 32 bit performance~n",[]),
_ =
- [measure_1(
- Algo,
- fun (_) -> 10000 end,
- TMark1,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (_) -> 1 bsl 32 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer half range performance~n",[]),
- HalfRangeFun = fun (State) -> half_range(State) end,
- TMark2 =
- measure_1(
- random,
- HalfRangeFun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
_ =
- [measure_1(
- Algo,
- HalfRangeFun,
- TMark2,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
- %%
- ct:pal("~nRNG uniform integer half range + 1 performance~n",[]),
- HalfRangePlus1Fun = fun (State) -> half_range(State) + 1 end,
- TMark3 =
measure_1(
- random,
- HalfRangePlus1Fun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
+ fun (State) -> half_range(State) end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
+ %%
+ ct:pal("~nRNG uniform integer half range + 1 performance~n",[]),
_ =
- [measure_1(
- Algo,
- HalfRangePlus1Fun,
- TMark3,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (State) -> half_range(State) + 1 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer full range - 1 performance~n",[]),
- FullRangeMinus1Fun = fun (State) -> (half_range(State) bsl 1) - 1 end,
- TMark4 =
- measure_1(
- random,
- FullRangeMinus1Fun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
_ =
- [measure_1(
- Algo,
- FullRangeMinus1Fun,
- TMark4,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (State) -> (half_range(State) bsl 1) - 1 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer full range performance~n",[]),
- FullRangeFun = fun (State) -> half_range(State) bsl 1 end,
- TMark5 =
- measure_1(
- random,
- FullRangeFun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
_ =
- [measure_1(
- Algo,
- FullRangeFun,
- TMark5,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (State) -> half_range(State) bsl 1 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer full range + 1 performance~n",[]),
- FullRangePlus1Fun = fun (State) -> (half_range(State) bsl 1) + 1 end,
- TMark6 =
- measure_1(
- random,
- FullRangePlus1Fun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
_ =
- [measure_1(
- Algo,
- FullRangePlus1Fun,
- TMark6,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (State) -> (half_range(State) bsl 1) + 1 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer double range performance~n",[]),
- DoubleRangeFun = fun (State) -> half_range(State) bsl 2 end,
- TMark7 =
- measure_1(
- random,
- DoubleRangeFun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
_ =
- [measure_1(
- Algo,
- DoubleRangeFun,
- TMark7,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (State) ->
+ half_range(State) bsl 2
+ end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform integer double range + 1 performance~n",[]),
- DoubleRangePlus1Fun = fun (State) -> (half_range(State) bsl 2) + 1 end,
- TMark8 =
+ _ =
measure_1(
- random,
- DoubleRangePlus1Fun,
- undefined,
- fun (Range, State) ->
- {int, random:uniform_s(Range, State)}
- end),
+ fun (State) ->
+ (half_range(State) bsl 2) + 1
+ end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
+ %%
+ ct:pal("~nRNG uniform integer 64 bit performance~n",[]),
_ =
- [measure_1(
- Algo,
- DoubleRangePlus1Fun,
- TMark8,
- fun (Range, State) ->
- {int, rand:uniform_s(Range, State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (_) -> 1 bsl 64 end,
+ fun (State, Range, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM_RANGE(
+ Mod:uniform_s(Range, St0), Range,
+ X, St1)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG uniform float performance~n",[]),
- TMark9 =
+ _ =
measure_1(
- random,
fun (_) -> 0 end,
- undefined,
- fun (_, State) ->
- {uniform, random:uniform_s(State)}
- end),
+ fun (State, _, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM(Mod:uniform_s(St0), X, St)
+ end,
+ State)
+ end,
+ Algs),
+ %%
+ ct:pal("~nRNG uniform_real float performance~n",[]),
_ =
- [measure_1(
- Algo,
- fun (_) -> 0 end,
- TMark9,
- fun (_, State) ->
- {uniform, rand:uniform_s(State)}
- end) || Algo <- Algos],
+ measure_1(
+ fun (_) -> 0 end,
+ fun (State, _, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_UNIFORM(Mod:uniform_real_s(St0), X, St)
+ end,
+ State)
+ end,
+ Algs),
%%
ct:pal("~nRNG normal float performance~n",[]),
- io:format("~.12w: not implemented (too few bits)~n", [random]),
- _ = [measure_1(
- Algo,
- fun (_) -> 0 end,
- TMark9,
- fun (_, State) ->
- {normal, rand:normal_s(State)}
- end) || Algo <- Algos],
+ [TMarkNormalFloat|_] =
+ measure_1(
+ fun (_) -> 0 end,
+ fun (State, _, Mod) ->
+ measure_loop(
+ fun (St0) ->
+ ?CHECK_NORMAL(Mod:normal_s(St0), X, St1)
+ end,
+ State)
+ end,
+ Algs),
+ %% Just for fun try an implementation of the Box-Muller
+ %% transformation for creating normal distribution floats
+ %% to compare with our Ziggurat implementation.
+ %% Generates two numbers per call that we add so they
+ %% will not be optimized away. Hence the benchmark time
+ %% is twice what it should be.
+ TwoPi = 2 * math:pi(),
+ _ =
+ measure_1(
+ fun (_) -> 0 end,
+ fun (State, _, Mod) ->
+ measure_loop(
+ fun (State0) ->
+ {U1, State1} = Mod:uniform_real_s(State0),
+ {U2, State2} = Mod:uniform_s(State1),
+ R = math:sqrt(-2.0 * math:log(U1)),
+ T = TwoPi * U2,
+ Z0 = R * math:cos(T),
+ Z1 = R * math:sin(T),
+ ?CHECK_NORMAL({Z0 + Z1, State2}, X, State3)
+ end,
+ State)
+ end,
+ exrop, TMarkNormalFloat),
+ ok.
+
+-define(LOOP_MEASURE, (?LOOP div 5)).
+
+measure_loop(Fun, State) ->
+ measure_loop(Fun, State, ?LOOP_MEASURE).
+%%
+measure_loop(Fun, State, N) when 0 < N ->
+ measure_loop(Fun, Fun(State), N-1);
+measure_loop(_, _, _) ->
ok.
-measure_1(Algo, RangeFun, TMark, Gen) ->
+measure_1(RangeFun, Fun, Algs) ->
+ TMark = measure_1(RangeFun, Fun, hd(Algs), undefined),
+ [TMark] ++
+ [measure_1(RangeFun, Fun, Alg, TMark) || Alg <- tl(Algs)].
+
+measure_1(RangeFun, Fun, Alg, TMark) ->
Parent = self(),
- Seed =
- case Algo of
+ {Mod, State} =
+ case Alg of
crypto64 ->
- crypto64_seed();
+ {rand, crypto64_seed()};
+ crypto_cache ->
+ {rand, crypto:rand_seed_alg(crypto_cache)};
crypto ->
- crypto:rand_seed_s();
+ {rand, crypto:rand_seed_s()};
random ->
- random:seed(os:timestamp()), get(random_seed);
+ {random, random:seed(os:timestamp()), get(random_seed)};
_ ->
- rand:seed_s(Algo)
+ {rand, rand:seed_s(Alg)}
end,
- Range = RangeFun(Seed),
+ Range = RangeFun(State),
Pid = spawn_link(
fun() ->
- Fun = fun() -> measure_2(?LOOP, Range, Seed, Gen) end,
- {Time, ok} = timer:tc(Fun),
+ {Time, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
Percent =
case TMark of
undefined -> 100;
@@ -673,7 +891,8 @@ measure_1(Algo, RangeFun, TMark, Gen) ->
end,
io:format(
"~.12w: ~p ns ~p% [16#~.16b]~n",
- [Algo, (Time * 1000 + 500) div ?LOOP, Percent, Range]),
+ [Alg, (Time * 1000 + 500) div ?LOOP_MEASURE,
+ Percent, Range]),
Parent ! {self(), Time},
normal
end),
@@ -681,21 +900,6 @@ measure_1(Algo, RangeFun, TMark, Gen) ->
{Pid, Msg} -> Msg
end.
-measure_2(N, Range, State0, Fun) when N > 0 ->
- case Fun(Range, State0) of
- {int, {Random, State}}
- when is_integer(Random), Random >= 1, Random =< Range ->
- measure_2(N-1, Range, State, Fun);
- {uniform, {Random, State}}
- when is_float(Random), 0.0 =< Random, Random < 1.0 ->
- measure_2(N-1, Range, State, Fun);
- {normal, {Random, State}} when is_float(Random) ->
- measure_2(N-1, Range, State, Fun);
- Res ->
- exit({error, Res, State0})
- end;
-measure_2(0, _, _, _) -> ok.
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% The jump sequence tests has two parts
%% for those with the functional API (jump/1)
diff --git a/lib/stdlib/test/stdlib.spec b/lib/stdlib/test/stdlib.spec
index 3768e494b2..91712b8963 100644
--- a/lib/stdlib/test/stdlib.spec
+++ b/lib/stdlib/test/stdlib.spec
@@ -1 +1,2 @@
{suites,"../stdlib_test",all}.
+{skip_suites,"../stdlib_test",stdlib_bench_SUITE, "bench only"}.
diff --git a/lib/stdlib/test/stdlib_bench.spec b/lib/stdlib/test/stdlib_bench.spec
new file mode 100644
index 0000000000..a5d1e1db80
--- /dev/null
+++ b/lib/stdlib/test/stdlib_bench.spec
@@ -0,0 +1,7 @@
+%% Needed to compile ,unicode_util_SUITE and string_SUITE...
+{cases,"../stdlib_test",unicode_util_SUITE, []}.
+{cases,"../stdlib_test",string_SUITE, []}.
+{skip_suites,"../stdlib_test",unicode_util_SUITE, "bench only"}.
+{skip_suites,"../stdlib_test",string_SUITE, "bench only"}.
+
+{suites,"../stdlib_test",[stdlib_bench_SUITE]}.
diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl
new file mode 100644
index 0000000000..8670e7029c
--- /dev/null
+++ b/lib/stdlib/test/stdlib_bench_SUITE.erl
@@ -0,0 +1,107 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2012-2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(stdlib_bench_SUITE).
+-compile([export_all, nowarn_export_all]).
+-include_lib("common_test/include/ct_event.hrl").
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}].
+
+
+all() ->
+ [{group,unicode}].
+
+groups() ->
+ [{unicode,[{repeat,5}],
+ [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary,
+ string_lexemes_list, string_lexemes_binary
+ ]}].
+
+init_per_group(_GroupName, Config) ->
+ Config.
+
+end_per_group(_GroupName, Config) ->
+ Config.
+
+init_per_suite(Config) ->
+ Config.
+
+end_per_suite(Config) ->
+ Config.
+
+init_per_testcase(_Func, Conf) ->
+ Conf.
+
+end_per_testcase(_Func, _Conf) ->
+ ok.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(REPEAT_NORM, 5).
+
+norm_nfc_list(Config) ->
+ Bin = norm_data(Config),
+ {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, list, Bin, ?REPEAT_NORM),
+ report(1000.0*Res / Mean).
+
+norm_nfc_deep_l(Config) ->
+ Bin = norm_data(Config),
+ {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, deep_l, Bin, ?REPEAT_NORM),
+ report(1000.0*Res / Mean).
+
+norm_nfc_binary(Config) ->
+ Bin = norm_data(Config),
+ {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, binary, Bin, ?REPEAT_NORM),
+ report(1000.0*Res / Mean).
+
+
+string_lexemes_list(Config) ->
+ %% Use nth_lexeme instead of lexemes to avoid building a result of
+ %% large lists which causes large differences between test runs, gc?
+ Bin = norm_data(Config),
+ Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end,
+ {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, list, Bin, 15),
+ report(1000.0*Res / Mean).
+
+string_lexemes_binary(Config) ->
+ %% Use nth_lexeme instead of lexemes to avoid building a result of
+ %% large lists which causes large differences between test runs, gc?
+ Bin = norm_data(Config),
+ Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end,
+ {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, binary, Bin, ?REPEAT_NORM),
+ report(1000.0*Res / Mean).
+
+%%%
+report(Tps) ->
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{suite,"stdlib_unicode"},{value,round(Tps)}]}),
+ Tps.
+
+norm_data(Config) ->
+ DataDir0 = proplists:get_value(data_dir, Config),
+ DataDir = filename:join(lists:droplast(filename:split(DataDir0))),
+ File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]),
+ {ok, Bin} = file:read_file(File),
+ Bin.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 90f980c0e5..05f18ef238 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -47,7 +47,7 @@
-export([to_upper_to_lower/1]).
%% Run tests when debugging them
--export([debug/0]).
+-export([debug/0, time_func/4]).
suite() ->
[{ct_hooks,[ts_install_cth]},
@@ -728,7 +728,7 @@ do_measure(TestDir) ->
{ok, Bin} = file:read_file(File),
io:format("~p~n",[byte_size(Bin)]),
Do = fun(Name, Func, Mode) ->
- {N, Mean, Stddev, _} = time_func(Func, Mode, Bin),
+ {N, Mean, Stddev, _} = time_func(Func, Mode, Bin, 50),
io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n",
[Name, Mode, Mean/1000, Stddev/1000, N])
end,
@@ -938,19 +938,19 @@ needs_check(_) -> true.
%%%% Timer stuff
-time_func(Fun, Mode, Bin) ->
+time_func(Fun, Mode, Bin, Repeat) ->
timer:sleep(100), %% Let emulator catch up and clean things before test runs
Self = self(),
Pid = spawn_link(fun() ->
Str = mode(Mode, Bin),
- Self ! {self(),time_func(0,0,0, Fun, Str, undefined)}
+ Self ! {self(),time_func(0,0,0, Fun, Str, undefined, Repeat)}
end),
receive {Pid,Msg} -> Msg end.
-time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 ->
+time_func(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat ->
{Time, Res} = timer:tc(fun() -> Fun(Str) end),
- time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res);
-time_func(N,Sum,SumSq, _, _, Res) ->
+ time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat);
+time_func(N,Sum,SumSq, _, _, Res, _) ->
Mean = round(Sum / N),
Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))),
{N, Mean, Stdev, Res}.
diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl
index 03c24c7027..7dba0a2fd0 100644
--- a/lib/stdlib/test/unicode_util_SUITE.erl
+++ b/lib/stdlib/test/unicode_util_SUITE.erl
@@ -29,7 +29,9 @@
get/1,
count/1]).
--export([debug/0, id/1, bin_split/1, uc_loaded_size/0]).
+-export([debug/0, id/1, bin_split/1, uc_loaded_size/0,
+ time_count/4 %% Used by stdlib_bench_SUITE
+ ]).
suite() ->
[{ct_hooks,[ts_install_cth]},
@@ -323,7 +325,7 @@ do_measure(Config) ->
File = DataDir ++ "/NormalizationTest.txt",
{ok, Bin} = file:read_file(File),
Do = fun(Func, Mode) ->
- {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin),
+ {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin, 10),
io:format("~4w ~6w ~.10w ~.6wms ±~.2wms #~.2w~n",
[Func, Mode, Res, Mean div 1000, Stddev div 1000, N])
end,
@@ -345,19 +347,19 @@ uc_loaded_size([_|Rest]) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-time_count(Fun, Mode, Bin) ->
+time_count(Fun, Mode, Bin, Repeat) ->
timer:sleep(100), %% Let emulator catch up and clean things before test runs
Self = self(),
Pid = spawn_link(fun() ->
Str = mode(Mode, Bin),
- Self ! {self(),do_count(0,0,0, Fun, Str, undefined)}
+ Self ! {self(),do_count(0,0,0, Fun, Str, undefined, Repeat)}
end),
receive {Pid,Msg} -> Msg end.
-do_count(N,Sum,SumSq, Fun, Str, _) when N < 10 ->
+do_count(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat ->
{Time, Res} = do_count(Fun, Str),
- do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res);
-do_count(N,Sum,SumSq, _, _, Res) ->
+ do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat);
+do_count(N,Sum,SumSq, _, _, Res, _) ->
Mean = round(Sum / N),
Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))),
{N, Mean, Stdev, Res}.
diff --git a/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt
index 4bb4b1369b..d7d8f90de0 100644
--- a/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt
+++ b/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt
@@ -1,23 +1,24 @@
-# GraphemeBreakTest-9.0.0.txt
-# Date: 2016-06-02, 18:28:17 GMT
-# © 2016 Unicode®, Inc.
+# GraphemeBreakTest-10.0.0.txt
+# Date: 2017-04-14, 05:40:29 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
-# Default Grapheme Break Test
+# Default Grapheme_Cluster_Break Test
#
# Format:
-# <string> (# <comment>)?
-# <string> contains hex Unicode code points, with
-# ÷ wherever there is a break opportunity, and
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Grapheme_Cluster_Break property value for the sample character
-# - [x] the rule that determines whether there is a break or not
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of GraphemeBreakTest.html
#
# These samples may be extended or changed in the future.
#
@@ -53,8 +54,8 @@
÷ 0020 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0020 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0020 × 0308 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0020 ÷ 2640 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0020 × 0308 ÷ 2640 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0020 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -93,8 +94,8 @@
÷ 000D ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 000D ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 000D ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 000D ÷ 2640 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 000D ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -133,8 +134,8 @@
÷ 000A ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 000A ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 000A ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 000A ÷ 2640 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 000A ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -173,8 +174,8 @@
÷ 0001 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0001 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0001 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0001 ÷ 2640 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0001 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -213,8 +214,8 @@
÷ 0300 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0300 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0300 × 0308 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0300 ÷ 2640 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0300 × 0308 ÷ 2640 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0300 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -253,8 +254,8 @@
÷ 0600 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0600 × 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0600 × 0308 ÷ 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0600 × 2640 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0600 × 0308 ÷ 2640 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0600 × 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] BOY (EBG) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
@@ -293,8 +294,8 @@
÷ 0903 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0903 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0903 × 0308 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0903 ÷ 2640 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0903 × 0308 ÷ 2640 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0903 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -333,8 +334,8 @@
÷ 1100 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 1100 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 1100 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 1100 ÷ 2640 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 1100 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 1100 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -373,8 +374,8 @@
÷ 1160 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 1160 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 1160 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 1160 ÷ 2640 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 1160 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 1160 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -413,8 +414,8 @@
÷ 11A8 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 11A8 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 11A8 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 11A8 ÷ 2640 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 11A8 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -453,8 +454,8 @@
÷ AC00 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ AC00 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ AC00 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ AC00 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ AC00 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ AC00 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -493,8 +494,8 @@
÷ AC01 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ AC01 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ AC01 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ AC01 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ AC01 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ AC01 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -533,8 +534,8 @@
÷ 1F1E6 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 1F1E6 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 1F1E6 × 0308 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 1F1E6 ÷ 2640 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 2640 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 1F1E6 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -573,8 +574,8 @@
÷ 261D × 0308 × 1F3FB ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 261D × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 261D × 0308 × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 261D ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 261D × 0308 ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 261D ÷ 2640 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 261D × 0308 ÷ 2640 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 261D ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 261D × 0308 ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 261D ÷ 0378 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -613,8 +614,8 @@
÷ 1F3FB × 0308 ÷ 1F3FB ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F3FB × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F3FB × 0308 × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 1F3FB ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 1F3FB × 0308 ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 1F3FB ÷ 2640 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 1F3FB × 0308 ÷ 2640 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 1F3FB ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F3FB ÷ 0378 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -653,54 +654,54 @@
÷ 200D × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 200D × 0308 ÷ 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 200D × 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 200D × 0308 ÷ 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) ÷ [0.3]
÷ 200D × 0308 ÷ 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 200D × 0308 ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
-÷ 2764 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 2764 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 2764 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 2764 × 0308 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 2764 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 2764 × 0308 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 2764 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 2764 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 2764 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
-÷ 2764 × 0308 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
-÷ 2764 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 2764 × 0308 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 2764 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 2764 × 0308 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 2764 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 2764 × 0308 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 2764 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 2764 × 0308 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 2764 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 2764 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 2764 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 2764 × 0308 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 2764 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 2764 × 0308 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 2764 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 2764 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 2764 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
-÷ 2764 × 0308 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
-÷ 2764 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
-÷ 2764 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
-÷ 2764 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 2764 × 0308 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 2764 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 2764 × 0308 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 2764 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] BOY (EBG) ÷ [0.3]
-÷ 2764 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
-÷ 2764 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 2764 × 0308 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 2764 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
-÷ 2764 × 0308 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
+÷ 2640 ÷ 0020 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 2640 × 0308 ÷ 0020 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 2640 ÷ 000D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 2640 × 0308 ÷ 000D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 2640 ÷ 000A ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 2640 × 0308 ÷ 000A ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 2640 ÷ 0001 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 2640 × 0308 ÷ 0001 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 2640 × 0300 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
+÷ 2640 × 0308 × 0300 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
+÷ 2640 ÷ 0600 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 2640 × 0308 ÷ 0600 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 2640 × 0903 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 2640 × 0308 × 0903 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 2640 ÷ 1100 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 2640 × 0308 ÷ 1100 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 2640 ÷ 1160 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 2640 × 0308 ÷ 1160 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 2640 ÷ 11A8 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 2640 × 0308 ÷ 11A8 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 2640 ÷ AC00 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 2640 × 0308 ÷ AC00 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 2640 ÷ AC01 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 2640 × 0308 ÷ AC01 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 2640 ÷ 1F1E6 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 2640 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 2640 ÷ 261D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
+÷ 2640 × 0308 ÷ 261D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
+÷ 2640 ÷ 1F3FB ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
+÷ 2640 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
+÷ 2640 × 200D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 2640 × 0308 × 200D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 2640 ÷ 2640 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 2640 × 0308 ÷ 2640 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 2640 ÷ 1F466 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] BOY (EBG) ÷ [0.3]
+÷ 2640 × 0308 ÷ 1F466 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
+÷ 2640 ÷ 0378 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 2640 × 0308 ÷ 0378 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 2640 ÷ D800 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
+÷ 2640 × 0308 ÷ D800 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F466 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F466 × 0308 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F466 ÷ 000D ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -733,8 +734,8 @@
÷ 1F466 × 0308 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F466 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F466 × 0308 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 1F466 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 1F466 × 0308 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 1F466 ÷ 2640 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 1F466 × 0308 ÷ 2640 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F466 ÷ 0378 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -773,8 +774,8 @@
÷ 0378 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ 0378 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ 0378 × 0308 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 0378 ÷ 2640 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ 0378 × 0308 ÷ 2640 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 0378 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -813,8 +814,8 @@
÷ D800 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ D800 ÷ 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ D800 ÷ 0308 × 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
-÷ D800 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
-÷ D800 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ D800 ÷ 2640 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
+÷ D800 ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ D800 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -840,7 +841,7 @@
÷ 261D × 1F3FB ÷ 261D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F466 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 1F466 × 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
-÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
+÷ 200D × 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3]
÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) ÷ [0.3]
÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3]
#
diff --git a/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt
index 05efcf5a44..6715446aba 100644
--- a/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt
+++ b/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt
@@ -1,25 +1,28 @@
-# LineBreakTest-9.0.0.txt
-# Date: 2016-06-18, 00:42:06 GMT
-# © 2016 Unicode®, Inc.
+# LineBreakTest-10.0.0.txt
+# Date: 2017-04-14, 05:40:30 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
-# Default Line Break Test
+# Default Line_Break Test
#
# Format:
-# <string> (# <comment>)?
-# <string> contains hex Unicode code points, with
-# ÷ wherever there is a break opportunity, and
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Line_Break property value for the sample character
-# - [x] the rule that determines whether there is a break or not
-# Note: The Line Break tests use tailoring of numbers described in Example 7 of Section 8.2 Examples of Customization.
-# They also differ from the results produced by a pair table implementation in sequences like: ZW SP CL.
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of LineBreakTest.html
+#
+# Note:
+# The Line_Break tests use tailoring of numbers described in
+# Example 7 of Section 8.2, "Examples of Customization" of UAX #14.
#
# These samples may be extended or changed in the future.
#
diff --git a/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt
index e133fa8a78..71f2371c5e 100644
--- a/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt
+++ b/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt
@@ -1,6 +1,6 @@
-# NormalizationTest-9.0.0.txt
-# Date: 2016-04-04, 11:41:55 GMT
-# © 2016 Unicode®, Inc.
+# NormalizationTest-10.0.0.txt
+# Date: 2017-03-08, 08:41:55 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -17653,6 +17653,10 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE
0061 0CBC 3099 093C 0334 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062; # (a◌಼◌゙◌़◌̴b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; ) LATIN SMALL LETTER A, KANNADA SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B
0061 05B0 094D 3099 0CCD 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062; # (a◌ְ◌्◌゙◌್b; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, KANNADA SIGN VIRAMA, LATIN SMALL LETTER B
0061 0CCD 05B0 094D 3099 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062; # (a◌್◌ְ◌्◌゙b; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; ) LATIN SMALL LETTER A, KANNADA SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 0D3B 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062; # (a◌ְ◌्◌゙◌഻b; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN VERTICAL BAR VIRAMA, LATIN SMALL LETTER B
+0061 0D3B 05B0 094D 3099 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062; # (a◌഻◌ְ◌्◌゙b; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN VERTICAL BAR VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 0D3C 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062; # (a◌ְ◌्◌゙◌഼b; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN CIRCULAR VIRAMA, LATIN SMALL LETTER B
+0061 0D3C 05B0 094D 3099 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062; # (a◌഼◌ְ◌्◌゙b; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN CIRCULAR VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
0061 05B0 094D 3099 0D4D 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062; # (a◌ְ◌्◌゙◌്b; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN VIRAMA, LATIN SMALL LETTER B
0061 0D4D 05B0 094D 3099 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062; # (a◌്◌ְ◌्◌゙b; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
0061 05B0 094D 3099 0DCA 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062; # (a◌ְ◌्◌゙◌්b; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, SINHALA SIGN AL-LAKUNA, LATIN SMALL LETTER B
@@ -17999,6 +18003,14 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE
0061 1DF4 0315 0300 05AE 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062; # (a◌ᷴ◌̕◌̀◌֮b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER U WITH DIAERESIS, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
0061 0315 0300 05AE 1DF5 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062; # (a◌̕◌̀◌֮◌᷵b; à◌֮◌᷵◌̕b; a◌֮◌̀◌᷵◌̕b; à◌֮◌᷵◌̕b; a◌֮◌̀◌᷵◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING UP TACK ABOVE, LATIN SMALL LETTER B
0061 1DF5 0315 0300 05AE 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062; # (a◌᷵◌̕◌̀◌֮b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING UP TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
+0061 035C 0315 0300 1DF6 0062;00E0 0315 1DF6 035C 0062;0061 0300 0315 1DF6 035C 0062;00E0 0315 1DF6 035C 0062;0061 0300 0315 1DF6 035C 0062; # (a◌͜◌̕◌̀◌᷶b; à◌̕◌᷶◌͜b; a◌̀◌̕◌᷶◌͜b; à◌̕◌᷶◌͜b; a◌̀◌̕◌᷶◌͜b; ) LATIN SMALL LETTER A, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, COMBINING KAVYKA ABOVE RIGHT, LATIN SMALL LETTER B
+0061 1DF6 035C 0315 0300 0062;00E0 1DF6 0315 035C 0062;0061 0300 1DF6 0315 035C 0062;00E0 1DF6 0315 035C 0062;0061 0300 1DF6 0315 035C 0062; # (a◌᷶◌͜◌̕◌̀b; à◌᷶◌̕◌͜b; a◌̀◌᷶◌̕◌͜b; à◌᷶◌̕◌͜b; a◌̀◌᷶◌̕◌͜b; ) LATIN SMALL LETTER A, COMBINING KAVYKA ABOVE RIGHT, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, LATIN SMALL LETTER B
+0061 0300 05AE 1D16D 1DF7 0062;00E0 1D16D 05AE 1DF7 0062;0061 1D16D 05AE 1DF7 0300 0062;00E0 1D16D 05AE 1DF7 0062;0061 1D16D 05AE 1DF7 0300 0062; # (a◌̀◌𝅭֮◌᷷b; à𝅭◌֮◌᷷b; a𝅭◌֮◌᷷◌̀b; à𝅭◌֮◌᷷b; a𝅭◌֮◌᷷◌̀b; ) LATIN SMALL LETTER A, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, COMBINING KAVYKA ABOVE LEFT, LATIN SMALL LETTER B
+0061 1DF7 0300 05AE 1D16D 0062;00E0 1D16D 1DF7 05AE 0062;0061 1D16D 1DF7 05AE 0300 0062;00E0 1D16D 1DF7 05AE 0062;0061 1D16D 1DF7 05AE 0300 0062; # (a◌᷷◌̀◌𝅭֮b; à𝅭◌᷷◌֮b; a𝅭◌᷷◌֮◌̀b; à𝅭◌᷷◌֮b; a𝅭◌᷷◌֮◌̀b; ) LATIN SMALL LETTER A, COMBINING KAVYKA ABOVE LEFT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, LATIN SMALL LETTER B
+0061 0300 05AE 1D16D 1DF8 0062;00E0 1D16D 05AE 1DF8 0062;0061 1D16D 05AE 1DF8 0300 0062;00E0 1D16D 05AE 1DF8 0062;0061 1D16D 05AE 1DF8 0300 0062; # (a◌̀◌𝅭֮◌᷸b; à𝅭◌֮◌᷸b; a𝅭◌֮◌᷸◌̀b; à𝅭◌֮◌᷸b; a𝅭◌֮◌᷸◌̀b; ) LATIN SMALL LETTER A, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, COMBINING DOT ABOVE LEFT, LATIN SMALL LETTER B
+0061 1DF8 0300 05AE 1D16D 0062;00E0 1D16D 1DF8 05AE 0062;0061 1D16D 1DF8 05AE 0300 0062;00E0 1D16D 1DF8 05AE 0062;0061 1D16D 1DF8 05AE 0300 0062; # (a◌᷸◌̀◌𝅭֮b; à𝅭◌᷸◌֮b; a𝅭◌᷸◌֮◌̀b; à𝅭◌᷸◌֮b; a𝅭◌᷸◌֮◌̀b; ) LATIN SMALL LETTER A, COMBINING DOT ABOVE LEFT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, LATIN SMALL LETTER B
+0061 059A 0316 302A 1DF9 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062; # (a◌֚◌̖◌〪◌᷹b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, COMBINING WIDE INVERTED BRIDGE BELOW, LATIN SMALL LETTER B
+0061 1DF9 059A 0316 302A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062; # (a◌᷹◌֚◌̖◌〪b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING WIDE INVERTED BRIDGE BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, LATIN SMALL LETTER B
0061 0315 0300 05AE 1DFB 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062; # (a◌̕◌̀◌֮◌᷻b; à◌֮◌᷻◌̕b; a◌֮◌̀◌᷻◌̕b; à◌֮◌᷻◌̕b; a◌֮◌̀◌᷻◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DELETION MARK, LATIN SMALL LETTER B
0061 1DFB 0315 0300 05AE 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062; # (a◌᷻◌̕◌̀◌֮b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DELETION MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
0061 035D 035C 0315 1DFC 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062; # (a◌͝◌͜◌̕◌᷼b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; ) LATIN SMALL LETTER A, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING DOUBLE INVERTED BREVE BELOW, LATIN SMALL LETTER B
@@ -18397,8 +18409,20 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE
0061 116B7 3099 093C 0334 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062; # (a◌𑚷◌゙◌़◌̴b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; ) LATIN SMALL LETTER A, TAKRI SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B
0061 05B0 094D 3099 1172B 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062; # (a◌ְ◌्◌゙◌𑜫b; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, AHOM SIGN KILLER, LATIN SMALL LETTER B
0061 1172B 05B0 094D 3099 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062; # (a◌𑜫◌ְ◌्◌゙b; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; ) LATIN SMALL LETTER A, AHOM SIGN KILLER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 11A34 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062; # (a◌ְ◌्◌゙◌𑨴b; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, ZANABAZAR SQUARE SIGN VIRAMA, LATIN SMALL LETTER B
+0061 11A34 05B0 094D 3099 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062; # (a◌𑨴◌ְ◌्◌゙b; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; ) LATIN SMALL LETTER A, ZANABAZAR SQUARE SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 11A47 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062; # (a◌ְ◌्◌゙◌𑩇b; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, ZANABAZAR SQUARE SUBJOINER, LATIN SMALL LETTER B
+0061 11A47 05B0 094D 3099 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062; # (a◌𑩇◌ְ◌्◌゙b; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; ) LATIN SMALL LETTER A, ZANABAZAR SQUARE SUBJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 11A99 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062; # (a◌ְ◌्◌゙◌𑪙b; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, SOYOMBO SUBJOINER, LATIN SMALL LETTER B
+0061 11A99 05B0 094D 3099 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062; # (a◌𑪙◌ְ◌्◌゙b; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; ) LATIN SMALL LETTER A, SOYOMBO SUBJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
0061 05B0 094D 3099 11C3F 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062; # (a◌ְ◌्◌゙◌𑰿b; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, BHAIKSUKI SIGN VIRAMA, LATIN SMALL LETTER B
0061 11C3F 05B0 094D 3099 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062; # (a◌𑰿◌ְ◌्◌゙b; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; ) LATIN SMALL LETTER A, BHAIKSUKI SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 3099 093C 0334 11D42 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062; # (a◌゙◌़◌̴◌𑵂b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, MASARAM GONDI SIGN NUKTA, LATIN SMALL LETTER B
+0061 11D42 3099 093C 0334 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062; # (a◌𑵂◌゙◌़◌̴b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; ) LATIN SMALL LETTER A, MASARAM GONDI SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B
+0061 05B0 094D 3099 11D44 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062; # (a◌ְ◌्◌゙◌𑵄b; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MASARAM GONDI SIGN HALANTA, LATIN SMALL LETTER B
+0061 11D44 05B0 094D 3099 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062; # (a◌𑵄◌ְ◌्◌゙b; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; ) LATIN SMALL LETTER A, MASARAM GONDI SIGN HALANTA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
+0061 05B0 094D 3099 11D45 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062; # (a◌ְ◌्◌゙◌𑵅b; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MASARAM GONDI VIRAMA, LATIN SMALL LETTER B
+0061 11D45 05B0 094D 3099 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062; # (a◌𑵅◌ְ◌्◌゙b; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; ) LATIN SMALL LETTER A, MASARAM GONDI VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B
0061 093C 0334 16AF0 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062; # (a◌़◌̴◌𖫰b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING HIGH TONE, LATIN SMALL LETTER B
0061 16AF0 093C 0334 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062; # (a◌𖫰◌़◌̴b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; ) LATIN SMALL LETTER A, BASSA VAH COMBINING HIGH TONE, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B
0061 093C 0334 16AF1 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062; # (a◌़◌̴◌𖫱b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING LOW TONE, LATIN SMALL LETTER B
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
new file mode 100644
index 0000000000..c625da56c6
--- /dev/null
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -0,0 +1,844 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([all/0, suite/0,groups/0,
+ normalize/1,
+ parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
+ parse_binary_host_ipv6/1,
+ parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
+ parse_binary_pct_encoded_userinfo/1, parse_binary_port/1,
+ parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1,
+ parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1,
+ parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1,
+ parse_pct_encoded_userinfo/1, parse_port/1,
+ parse_query/1, parse_scheme/1, parse_userinfo/1,
+ parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1,
+ parse_special/1, parse_special2/1, parse_negative/1,
+ recompose_fragment/1, recompose_parse_fragment/1,
+ recompose_query/1, recompose_parse_query/1,
+ recompose_path/1, recompose_parse_path/1,
+ recompose_autogen/1, parse_recompose_autogen/1,
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
+ ]).
+
+
+-define(SCHEME, "foo").
+-define(USERINFO, "åsa").
+-define(USERINFO_ENC, "%C3%A5sa").
+-define(HOST, "älvsjö").
+-define(HOST_ENC, "%C3%A4lvsj%C3%B6").
+-define(IPV6, "::127.0.0.1").
+-define(IPV6_ENC, "[::127.0.0.1]").
+-define(PORT, 8042).
+-define(PORT_ENC, ":8042").
+-define(PATH, "/där").
+-define(PATH_ENC, "/d%C3%A4r").
+-define(QUERY, "name=örn").
+-define(QUERY_ENC, "?name=%C3%B6rn").
+-define(FRAGMENT, "näsa").
+-define(FRAGMENT_ENC, "#n%C3%A4sa").
+
+
+suite() ->
+ [{timetrap,{minutes,1}}].
+
+all() ->
+ [
+ normalize,
+ parse_binary_scheme,
+ parse_binary_userinfo,
+ parse_binary_pct_encoded_userinfo,
+ parse_binary_host,
+ parse_binary_host_ipv4,
+ parse_binary_host_ipv6,
+ parse_binary_port,
+ parse_binary_path,
+ parse_binary_query,
+ parse_binary_pct_encoded_query,
+ parse_binary_fragment,
+ parse_binary_pct_encoded_fragment,
+ parse_scheme,
+ parse_userinfo,
+ parse_pct_encoded_userinfo,
+ parse_host,
+ parse_host_ipv4,
+ parse_host_ipv6,
+ parse_port,
+ parse_path,
+ parse_query,
+ parse_pct_encoded_query,
+ parse_fragment,
+ parse_pct_encoded_fragment,
+ parse_list,
+ parse_binary,
+ parse_mixed,
+ parse_relative,
+ parse_special,
+ parse_special2,
+ parse_negative,
+ recompose_fragment,
+ recompose_parse_fragment,
+ recompose_query,
+ recompose_parse_query,
+ recompose_path,
+ recompose_parse_path,
+ recompose_autogen,
+ parse_recompose_autogen,
+ transcode_basic,
+ transcode_options,
+ transcode_mixed,
+ transcode_negative
+ ].
+
+groups() ->
+ [].
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions
+%%-------------------------------------------------------------------------
+uri_combinations() ->
+ [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] ||
+ Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none],
+ Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none],
+ Hst <- [fun update_host/1, fun update_host_binary/1,
+ fun update_ipv6/1, fun update_ipv6_binary/1, none],
+ Prt <- [fun update_port/1, none],
+ Pat <- [fun update_path/1, fun update_path_binary/1],
+ Qry <- [fun update_query/1,fun update_query_binary/1, none],
+ Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none],
+ not (Usr =:= none andalso Hst =:= none andalso Prt =/= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =:= none),
+ not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)].
+
+
+generate_test_vector(Comb) ->
+ Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI});
+ (_, Map) -> Map
+ end,
+ lists:foldl(Fun, {#{}, empty}, Comb).
+
+generate_test_vectors(L) ->
+ lists:map(fun generate_test_vector/1, L).
+
+update_fragment({In, empty}) ->
+ {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_list(Out) ->
+ {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC};
+update_fragment({In, Out}) when is_binary(Out) ->
+ {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}.
+
+update_fragment_binary({In, empty}) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>};
+update_fragment_binary({In, Out}) when is_list(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC};
+update_fragment_binary({In, Out}) when is_binary(Out) ->
+ {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}.
+
+
+update_query({In, empty}) ->
+ {In#{query => ?QUERY}, ?QUERY_ENC};
+update_query({In, Out}) when is_list(Out) ->
+ {In#{query => ?QUERY}, Out ++ ?QUERY_ENC};
+update_query({In, Out}) when is_binary(Out) ->
+ {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}.
+
+update_query_binary({In, empty}) ->
+ {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>};
+update_query_binary({In, Out}) when is_list(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC};
+update_query_binary({In, Out}) when is_binary(Out) ->
+ {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}.
+
+update_path({In, empty}) ->
+ {In#{path => ?PATH}, ?PATH_ENC};
+update_path({In, Out}) when is_list(Out) ->
+ {In#{path => ?PATH}, Out ++ ?PATH_ENC};
+update_path({In, Out}) when is_binary(Out) ->
+ {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}.
+
+update_path_binary({In, empty}) ->
+ {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>};
+update_path_binary({In, Out}) when is_list(Out) ->
+ {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC};
+update_path_binary({In, Out}) when is_binary(Out) ->
+ {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}.
+
+update_port({In, Out}) when is_list(Out) ->
+ {In#{port => ?PORT}, Out ++ ?PORT_ENC};
+update_port({In, Out}) when is_binary(Out) ->
+ {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}.
+
+update_host({In, empty}) ->
+ {In#{host => ?HOST}, "//" ++ ?HOST_ENC};
+update_host({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]};
+ false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]}
+ end.
+
+update_host_binary({In, empty}) ->
+ {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>};
+update_host_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]};
+ false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]}
+ end;
+update_host_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>};
+ false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>}
+ end.
+
+update_ipv6({In, empty}) ->
+ {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC};
+update_ipv6({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]}
+ end.
+
+update_ipv6_binary({In, empty}) ->
+ {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>};
+update_ipv6_binary({In, Out}) when is_list(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]};
+ false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]}
+ end;
+update_ipv6_binary({In, Out}) when is_binary(Out) ->
+ case maps:is_key(userinfo, In) of
+ true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>};
+ false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>}
+ end.
+
+update_userinfo({In, empty}) ->
+ {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_list(Out) ->
+ {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}.
+
+update_userinfo_binary({In, empty}) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>};
+update_userinfo_binary({In, Out}) when is_list(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC};
+update_userinfo_binary({In, Out}) when is_binary(Out) ->
+ {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}.
+
+update_scheme({In, empty}) ->
+ {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}.
+
+update_scheme_binary({In, empty}) ->
+ {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}.
+
+
+%% Test recompose on a generated test vector
+run_test_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(#{}) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(Map) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+%% Test parse - recompose on a generated test vector
+run_test_parse_recompose({#{}, empty}) ->
+ try "" = uri_string:recompose(uri_string:parse("")) of
+ _ -> ok
+ catch
+ _:_ -> error({test_failed, #{}, ""})
+ end;
+run_test_parse_recompose({Map, URI}) ->
+ try URI = uri_string:recompose(uri_string:parse(URI)) of
+ URI -> ok
+ catch
+ _:_ -> error({test_failed, Map, URI})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Parse tests
+%%-------------------------------------------------------------------------
+
+parse_binary_scheme(_Config) ->
+ #{} = uri_string:parse(<<>>),
+ #{path := <<"foo">>} = uri_string:parse(<<"foo">>),
+ #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>),
+ #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>),
+ #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>),
+ #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>),
+ #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>),
+
+ #{path := <<"/">>} = uri_string:parse(<<"/">>),
+ #{host := <<>>} = uri_string:parse(<<"//">>),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>).
+
+parse_binary_userinfo(_Config) ->
+ #{scheme := <<"user">>, path := <<"password@localhost">>} =
+ uri_string:parse(<<"user:password@localhost">>),
+ #{path := <<"user@">>} = uri_string:parse(<<"user@">>),
+ #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>),
+ #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>),
+ #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>),
+ #{userinfo := <<"user:password">>, host := <<"localhost">>} =
+ uri_string:parse(<<"//user:password@localhost">>),
+ #{scheme := <<"foo">>, path := <<"/user@">>} =
+ uri_string:parse(<<"foo:/user@">>),
+ #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} =
+ uri_string:parse(<<"foo://user@localhost">>),
+ #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} =
+ uri_string:parse(<<"foo://user:password@localhost">>).
+
+parse_binary_pct_encoded_userinfo(_Config) ->
+ #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} =
+ uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} =
+ uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>),
+ #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>),
+ #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} =
+ uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>),
+ {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>),
+ {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>).
+
+parse_binary_host(_Config) ->
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>),
+ #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} =
+ uri_string:parse(<<"foo://user@hostname">>).
+
+parse_binary_host_ipv4(_Config) ->
+ #{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>),
+ #{host := <<"127.0.0.1">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"//127.0.0.1/over/there">>),
+ #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//127.0.0.1?name=ferret">>),
+ #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>),
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>).
+
+parse_binary_host_ipv6(_Config) ->
+ #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>),
+ #{host := <<"2001:0db8:0000:0000:0000:0000:1428:07ab">>} =
+ uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>),
+ #{host := <<"::127.0.0.1">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"//[::127.0.0.1]/over/there">>),
+ #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>),
+ #{host := <<"::127.0.0.1">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//[::127.0.0.1]#nose">>),
+ {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>),
+ {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>).
+
+parse_binary_port(_Config) ->
+ #{path:= <<"/:8042">>} =
+ uri_string:parse(<<"/:8042">>),
+ #{host:= <<>>, port := 8042} =
+ uri_string:parse(<<"//:8042">>),
+ #{host := <<"example.com">>, port:= 8042} =
+ uri_string:parse(<<"//example.com:8042">>),
+ #{scheme := <<"foo">>, path := <<"/:8042">>} =
+ uri_string:parse(<<"foo:/:8042">>),
+ #{scheme := <<"foo">>, host := <<>>, port := 8042} =
+ uri_string:parse(<<"foo://:8042">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} =
+ uri_string:parse(<<"foo://example.com:8042">>),
+ {error,invalid_uri,":"} = uri_string:parse(":600"),
+ {error,invalid_uri,"x"} = uri_string:parse("//:8042x").
+
+parse_binary_path(_Config) ->
+ #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>),
+ #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>),
+ #{scheme := <<"foo">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"foo:/over/there">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} =
+ uri_string:parse(<<"foo://example.com/over/there">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} =
+ uri_string:parse(<<"foo://example.com:8042/over/there">>).
+
+parse_binary_query(_Config) ->
+ #{scheme := <<"foo">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:?name=ferret">>),
+ #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:over/there?name=ferret">>),
+ #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo:/over/there?name=ferret">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo://example.com?name=ferret">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"foo://example.com/?name=ferret">>),
+
+ #{path := <<>>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"?name=ferret">>),
+ #{path := <<"over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"over/there?name=ferret">>),
+ #{path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"/?name=ferret">>),
+ #{path := <<"/over/there">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"/over/there?name=ferret">>),
+ #{host := <<"example.com">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//example.com?name=ferret">>),
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} =
+ uri_string:parse(<<"//example.com/?name=ferret">>).
+
+parse_binary_pct_encoded_query(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>,
+ query := <<"name=合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>).
+
+parse_binary_fragment(_Config) ->
+ #{scheme := <<"foo">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:#nose">>),
+ #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:over/there#nose">>),
+ #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo:/over/there#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com/#nose">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com#nose">>),
+
+ #{fragment := <<"nose">>} =
+ uri_string:parse(<<"#nose">>),
+ #{path := <<"over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"over/there#nose">>),
+ #{path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"/#nose">>),
+ #{path := <<"/over/there">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"/over/there#nose">>),
+ #{host := <<"example.com">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//example.com#nose">>),
+ #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"//example.com/#nose">>).
+
+parse_binary_pct_encoded_fragment(_Config) ->
+ #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>),
+ #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} =
+ uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>).
+
+parse_scheme(_Config) ->
+ #{} = uri_string:parse(""),
+ #{path := "foo"} = uri_string:parse("foo"),
+ #{scheme := "foo"} = uri_string:parse("foo:"),
+ #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),
+ #{scheme := "foo", host := ""} = uri_string:parse("foo://"),
+ #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"),
+ #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"),
+
+ #{path := "/"} = uri_string:parse("/"),
+ #{host := ""} = uri_string:parse("//"),
+ #{host := "", path := "/"} = uri_string:parse("///").
+
+parse_userinfo(_Config) ->
+ #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"),
+ #{path := "user@"} = uri_string:parse("user@"),
+ #{path := "/user@"} = uri_string:parse("/user@"),
+ #{path := "user@localhost"} = uri_string:parse("user@localhost"),
+ #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"),
+ #{userinfo := "user:password", host := "localhost"} =
+ uri_string:parse("//user:password@localhost"),
+ #{scheme := "foo", path := "/user@"} =
+ uri_string:parse("foo:/user@"),
+ #{scheme := "foo", userinfo := "user", host := "localhost"} =
+ uri_string:parse("foo://user@localhost"),
+ #{scheme := "foo", userinfo := "user:password", host := "localhost"} =
+ uri_string:parse("foo://user:password@localhost").
+
+parse_pct_encoded_userinfo(_Config) ->
+ #{scheme := "user", path := "合@気道"} =
+ uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合", host := "気道"} =
+ uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{userinfo := "合:気", host := "道"} =
+ uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"),
+ #{scheme := "foo", path := "/合気道@"} =
+ uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"),
+ #{scheme := "foo", userinfo := "合", host := "気道"} =
+ uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"),
+ #{scheme := "foo", userinfo := "合:気", host := "道"} =
+ uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"),
+ {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@").
+
+
+parse_host(_Config) ->
+ #{host := "hostname"} = uri_string:parse("//hostname"),
+ #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"),
+ #{host := "hostname",scheme := "foo", userinfo := "user"} =
+ uri_string:parse("foo://user@hostname").
+
+parse_host_ipv4(_Config) ->
+ #{host := "127.0.0.1"} = uri_string:parse("//127.0.0.1"),
+ #{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} =
+ uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"),
+ #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"),
+ #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"),
+ #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"),
+ {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"),
+ {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1").
+
+parse_host_ipv6(_Config) ->
+ #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"),
+ #{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"),
+ #{host := "::127.0.0.1", query := "name=ferret"} =
+ uri_string:parse("//[::127.0.0.1]?name=ferret"),
+ #{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"),
+ {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"),
+ {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"),
+ {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]").
+
+parse_port(_Config) ->
+ #{path:= "/:8042"} =
+ uri_string:parse("/:8042"),
+ #{host:= "", port := 8042} =
+ uri_string:parse("//:8042"),
+ #{host := "example.com", port:= 8042} =
+ uri_string:parse("//example.com:8042"),
+ #{scheme := "foo", path := "/:8042"} =
+ uri_string:parse("foo:/:8042"),
+ #{scheme := "foo", host := "", port := 8042} =
+ uri_string:parse("foo://:8042"),
+ #{scheme := "foo", host := "example.com", port := 8042} =
+ uri_string:parse("foo://example.com:8042").
+
+parse_path(_Config) ->
+ #{path := "over/there"} = uri_string:parse("over/there"),
+ #{path := "/over/there"} = uri_string:parse("/over/there"),
+ #{scheme := "foo", path := "/over/there"} =
+ uri_string:parse("foo:/over/there"),
+ #{scheme := "foo", host := "example.com", path := "/over/there"} =
+ uri_string:parse("foo://example.com/over/there"),
+ #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} =
+ uri_string:parse("foo://example.com:8042/over/there").
+
+parse_query(_Config) ->
+ #{scheme := "foo", query := "name=ferret"} =
+ uri_string:parse("foo:?name=ferret"),
+ #{scheme := "foo", path:= "over/there", query := "name=ferret"} =
+ uri_string:parse("foo:over/there?name=ferret"),
+ #{scheme := "foo", path:= "/over/there", query := "name=ferret"} =
+ uri_string:parse("foo:/over/there?name=ferret"),
+ #{scheme := "foo", host := "example.com", query := "name=ferret"} =
+ uri_string:parse("foo://example.com?name=ferret"),
+ #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} =
+ uri_string:parse("foo://example.com/?name=ferret"),
+
+ #{path := "", query := "name=ferret"} =
+ uri_string:parse("?name=ferret"),
+ #{path := "over/there", query := "name=ferret"} =
+ uri_string:parse("over/there?name=ferret"),
+ #{path := "/", query := "name=ferret"} =
+ uri_string:parse("/?name=ferret"),
+ #{path := "/over/there", query := "name=ferret"} =
+ uri_string:parse("/over/there?name=ferret"),
+ #{host := "example.com", query := "name=ferret"} =
+ uri_string:parse("//example.com?name=ferret"),
+ #{host := "example.com", path := "/", query := "name=ferret"} =
+ uri_string:parse("//example.com/?name=ferret").
+
+parse_pct_encoded_query(_Config) ->
+ #{scheme := "foo", host := "example.com", path := "/",
+ query := "name=合気道"} =
+ uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", query := "name=合気道"} =
+ uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93").
+
+parse_fragment(_Config) ->
+ #{scheme := "foo", fragment := "nose"} =
+ uri_string:parse("foo:#nose"),
+ #{scheme := "foo", path:= "over/there", fragment := "nose"} =
+ uri_string:parse("foo:over/there#nose"),
+ #{scheme := "foo", path:= "/over/there", fragment := "nose"} =
+ uri_string:parse("foo:/over/there#nose"),
+ #{scheme := "foo", host := "example.com", fragment := "nose"} =
+ uri_string:parse("foo://example.com#nose"),
+ #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} =
+ uri_string:parse("foo://example.com/#nose"),
+ #{scheme := "foo", host := "example.com", fragment := "nose"} =
+ uri_string:parse("foo://example.com#nose"),
+
+ #{fragment := "nose"} =
+ uri_string:parse("#nose"),
+ #{path := "over/there", fragment := "nose"} =
+ uri_string:parse("over/there#nose"),
+ #{path := "/", fragment := "nose"} =
+ uri_string:parse("/#nose"),
+ #{path := "/over/there", fragment := "nose"} =
+ uri_string:parse("/over/there#nose"),
+ #{host := "example.com", fragment := "nose"} =
+ uri_string:parse("//example.com#nose"),
+ #{host := "example.com", path := "/", fragment := "nose"} =
+ uri_string:parse("//example.com/#nose").
+
+parse_pct_encoded_fragment(_Config) ->
+ #{scheme := "foo", host := "example.com", fragment := "合気道"} =
+ uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"),
+ #{host := "example.com", path := "/", fragment := "合気道"} =
+ uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93").
+
+parse_list(_Config) ->
+ #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"),
+ #{scheme := "foo", host := "example.com", port := 8042,
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
+ uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"),
+ #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042,
+ path := "/over/there", query := "name=ferret", fragment := "nose"} =
+ uri_string:parse("foo://admin:[email protected]:8042/over/there?name=ferret#nose").
+
+parse_binary(_Config) ->
+ #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>),
+ #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042,
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>),
+ #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042,
+ path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} =
+ uri_string:parse(<<"foo://admin:[email protected]:8042/over/there?name=ferret#nose">>).
+
+
+parse_mixed(_Config) ->
+ #{scheme := "foo", path := "bar"} =
+ uri_string:parse(lists:append("fo",<<"o:bar">>)),
+ #{scheme := "foo", path := "bar"} =
+ uri_string:parse(lists:append("foo:b",<<"ar">>)),
+ #{scheme := "foo", path := "bar:bar"} =
+ uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]).
+
+parse_relative(_Config) ->
+ #{path := "/path"} =
+ uri_string:parse(lists:append("/pa",<<"th">>)),
+ #{path := "foo"} =
+ uri_string:parse(lists:append("fo",<<"o">>)).
+
+parse_special(_Config) ->
+ #{host := [],query := []} = uri_string:parse("//?"),
+ #{fragment := [],host := []} = uri_string:parse("//#"),
+ #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"),
+ #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"),
+ #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>),
+ #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>),
+ #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>),
+ #{host := [],path := "/",query := []} = uri_string:parse("///?"),
+ #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"),
+ #{host := "foo",query := []} = uri_string:parse("//foo?"),
+ #{fragment := [],host := "foo"} = uri_string:parse("//foo#"),
+ #{host := "foo",path := "/"} = uri_string:parse("//foo/"),
+ #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"),
+ #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"),
+ #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"),
+ #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"),
+ #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"),
+ #{path := [],query := []} = uri_string:parse("?"),
+ #{path := [],query := "?"} = uri_string:parse("??"),
+ #{path := [],query := "??"} = uri_string:parse("???").
+
+parse_special2(_Config) ->
+ #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"),
+ #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"),
+ #{host := [],path := [],userinfo := []} = uri_string:parse("//@"),
+ #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"),
+ #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"),
+ #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"),
+ #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"),
+ #{host := [],path := [],port := undefined} = uri_string:parse("//:").
+
+parse_negative(_Config) ->
+ {error,invalid_uri,"å"} = uri_string:parse("å"),
+ {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"),
+ {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"),
+ {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"),
+ {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"),
+ {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"),
+ {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"),
+ {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8").
+
+
+%%-------------------------------------------------------------------------
+%% Recompose tests
+%%-------------------------------------------------------------------------
+recompose_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}),
+ ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}).
+
+recompose_parse_fragment(_Config) ->
+ <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)),
+ ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)).
+
+recompose_query(_Config) ->
+ <<?QUERY_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}),
+ <<?QUERY_ENC?FRAGMENT_ENC>> =
+ uri_string:recompose(#{query => <<?QUERY/utf8>>,
+ fragment => <<?FRAGMENT/utf8>>,
+ path => <<>>}),
+ "?name=%C3%B6rn" =
+ uri_string:recompose(#{query => "name=örn", path => ""}),
+ "?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{query => "name=örn",
+ fragment => "näsa",
+ path => ""}).
+
+recompose_parse_query(_Config) ->
+ <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)),
+ <<"?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)),
+ "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")),
+ "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")).
+
+recompose_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>}),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"name=örn"/utf8>>}),
+ <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> =
+ uri_string:recompose(#{path => <<"/där"/utf8>>,
+ query => <<"name=örn"/utf8>>,
+ fragment => <<"näsa"/utf8>>}),
+
+
+ "/d%C3%A4r" =
+ uri_string:recompose(#{path => "/där"}),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ fragment => "näsa"}),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(#{path => "/där",
+ query => "name=örn"}),
+ "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" =
+ uri_string:recompose(#{path => "/där",
+ query => "name=örn",
+ fragment => "näsa"}).
+
+
+recompose_parse_path(_Config) ->
+ <<"/d%C3%A4r">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)),
+ <<"/d%C3%A4r#n%C3%A4sa">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)),
+ <<"/d%C3%A4r?name=%C3%B6rn">> =
+ uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)),
+
+ "/d%C3%A4r" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r")),
+ "/d%C3%A4r#n%C3%A4sa" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")),
+ "/d%C3%A4r?name=%C3%B6rn" =
+ uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")).
+
+
+recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_recompose/1, Tests).
+
+parse_recompose_autogen(_Config) ->
+ Tests = generate_test_vectors(uri_combinations()),
+ lists:map(fun run_test_parse_recompose/1, Tests).
+
+transcode_basic(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]).
+
+transcode_options(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []),
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]).
+
+transcode_mixed(_Config) ->
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]).
+
+transcode_negative(_Config) ->
+ {error,invalid_percent_encoding,"%BXbar"} =
+ uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ {error,invalid_input,<<"ö">>} =
+ uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
+
+normalize(_Config) ->
+ "/a/g" = uri_string:normalize("/a/b/c/./../../g"),
+ <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),
+ "http://localhost-%C3%B6rebro/a/g" =
+ uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"),
+ <<"http://localhost-%C3%B6rebro/a/g">> =
+ uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>),
+ <<"https://localhost/">> =
+ uri_string:normalize(<<"https://localhost:443">>),
+ <<"https://localhost:445/">> =
+ uri_string:normalize(<<"https://localhost:445">>),
+ <<"ftp://localhost">> =
+ uri_string:normalize(<<"ftp://localhost:21">>),
+ <<"ssh://localhost">> =
+ uri_string:normalize(<<"ssh://localhost:22">>),
+ <<"sftp://localhost">> =
+ uri_string:normalize(<<"sftp://localhost:22">>),
+ <<"tftp://localhost">> =
+ uri_string:normalize(<<"tftp://localhost:69">>).
diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl
new file mode 100644
index 0000000000..ae2c61c7aa
--- /dev/null
+++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl
@@ -0,0 +1,39 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(uri_string_property_test_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+-compile(export_all).
+
+all() -> [recompose].
+
+init_per_suite(Config) ->
+ ct_property_test:init_per_suite(Config).
+
+end_per_suite(Config) ->
+ Config.
+
+%%%========================================================================
+%%% Test suites
+%%%========================================================================
+recompose(Config) ->
+ ct_property_test:quickcheck(
+ uri_string_recompose:prop_recompose(),
+ Config).
diff --git a/lib/stdlib/uc_spec/CaseFolding.txt b/lib/stdlib/uc_spec/CaseFolding.txt
index 372ee68bd8..efdf18e441 100644
--- a/lib/stdlib/uc_spec/CaseFolding.txt
+++ b/lib/stdlib/uc_spec/CaseFolding.txt
@@ -1,6 +1,6 @@
-# CaseFolding-9.0.0.txt
-# Date: 2016-03-02, 18:54:54 GMT
-# © 2016 Unicode®, Inc.
+# CaseFolding-10.0.0.txt
+# Date: 2017-04-14, 05:40:18 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -24,7 +24,7 @@
#
# NOTE: case folding does not preserve normalization formats!
#
-# For information on case folding, including how to have case folding
+# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard.
#
diff --git a/lib/stdlib/uc_spec/CompositionExclusions.txt b/lib/stdlib/uc_spec/CompositionExclusions.txt
index 1999ed1328..ff42508686 100644
--- a/lib/stdlib/uc_spec/CompositionExclusions.txt
+++ b/lib/stdlib/uc_spec/CompositionExclusions.txt
@@ -1,6 +1,6 @@
-# CompositionExclusions-9.0.0.txt
-# Date: 2016-01-21, 22:00:00 GMT [KW, LI]
-# © 2016 Unicode®, Inc.
+# CompositionExclusions-10.0.0.txt
+# Date: 2017-02-15, 00:00:00 GMT [KW, LI]
+# © 2017 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
diff --git a/lib/stdlib/uc_spec/GraphemeBreakProperty.txt b/lib/stdlib/uc_spec/GraphemeBreakProperty.txt
index c5e94a3762..32bb12e47e 100644
--- a/lib/stdlib/uc_spec/GraphemeBreakProperty.txt
+++ b/lib/stdlib/uc_spec/GraphemeBreakProperty.txt
@@ -1,6 +1,6 @@
-# GraphemeBreakProperty-9.0.0.txt
-# Date: 2016-06-03, 22:23:55 GMT
-# © 2016 Unicode®, Inc.
+# GraphemeBreakProperty-10.0.0.txt
+# Date: 2017-03-12, 07:03:41 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -25,8 +25,11 @@
0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH
110BD ; Prepend # Cf KAITHI NUMBER SIGN
111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
+11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A86..11A89 ; Prepend # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
+11D46 ; Prepend # Lo MASARAM GONDI REPHA
-# Total code points: 13
+# Total code points: 19
# ================================================
@@ -126,6 +129,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B3C ; Extend # Mn ORIYA SIGN NUKTA
0B3E ; Extend # Mc ORIYA VOWEL SIGN AA
@@ -154,7 +158,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
-0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
+0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
@@ -243,7 +248,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
+1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C ; Extend # Cf ZERO WIDTH NON-JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
@@ -353,6 +358,15 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER
+11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA
@@ -360,6 +374,11 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47 ; Extend # Mn MASARAM GONDI RA-KARA
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
@@ -387,7 +406,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1828
+# Total code points: 1901
# ================================================
@@ -472,6 +491,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
+1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA
A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
@@ -529,6 +549,10 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
+11A07..11A08 ; SpacingMark # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA
11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA
11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA
11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA
@@ -538,7 +562,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
-# Total code points: 341
+# Total code points: 348
# ================================================
@@ -1375,8 +1399,9 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
26F9 ; E_Base # So PERSON WITH BALL
270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND
1F385 ; E_Base # So FATHER CHRISTMAS
-1F3C3..1F3C4 ; E_Base # So [2] RUNNER..SURFER
-1F3CA..1F3CB ; E_Base # So [2] SWIMMER..WEIGHT LIFTER
+1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER
+1F3C7 ; E_Base # So HORSE RACING
+1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER
1F442..1F443 ; E_Base # So [2] EAR..NOSE
1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN
1F46E ; E_Base # So POLICE OFFICER
@@ -1385,7 +1410,7 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER
1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT
1F4AA ; E_Base # So FLEXED BICEPS
-1F575 ; E_Base # So SLEUTH OR SPY
+1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY
1F57A ; E_Base # So MAN DANCING
1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED
1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
@@ -1394,13 +1419,15 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
1F6A3 ; E_Base # So ROWBOAT
1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN
1F6C0 ; E_Base # So BATH
-1F918..1F91E ; E_Base # So [7] SIGN OF THE HORNS..HAND WITH INDEX AND MIDDLE FINGERS CROSSED
+1F6CC ; E_Base # So SLEEPING ACCOMMODATION
+1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST
+1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN
1F926 ; E_Base # So FACE PALM
-1F930 ; E_Base # So PREGNANT WOMAN
-1F933..1F939 ; E_Base # So [7] SELFIE..JUGGLING
-1F93C..1F93E ; E_Base # So [3] WRESTLERS..HANDBALL
+1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING
+1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL
+1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF
-# Total code points: 79
+# Total code points: 98
# ================================================
@@ -1416,11 +1443,28 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
# ================================================
+2640 ; Glue_After_Zwj # So FEMALE SIGN
+2642 ; Glue_After_Zwj # So MALE SIGN
+2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES
+2708 ; Glue_After_Zwj # So AIRPLANE
2764 ; Glue_After_Zwj # So HEAVY BLACK HEART
+1F308 ; Glue_After_Zwj # So RAINBOW
+1F33E ; Glue_After_Zwj # So EAR OF RICE
+1F373 ; Glue_After_Zwj # So COOKING
+1F393 ; Glue_After_Zwj # So GRADUATION CAP
+1F3A4 ; Glue_After_Zwj # So MICROPHONE
+1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE
+1F3EB ; Glue_After_Zwj # So SCHOOL
+1F3ED ; Glue_After_Zwj # So FACTORY
1F48B ; Glue_After_Zwj # So KISS MARK
+1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE
+1F527 ; Glue_After_Zwj # So WRENCH
+1F52C ; Glue_After_Zwj # So MICROSCOPE
1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE
+1F680 ; Glue_After_Zwj # So ROCKET
+1F692 ; Glue_After_Zwj # So FIRE ENGINE
-# Total code points: 3
+# Total code points: 22
# ================================================
diff --git a/lib/stdlib/uc_spec/PropList.txt b/lib/stdlib/uc_spec/PropList.txt
index a8c0da7135..9a2d0e4b1c 100644
--- a/lib/stdlib/uc_spec/PropList.txt
+++ b/lib/stdlib/uc_spec/PropList.txt
@@ -1,6 +1,6 @@
-# PropList-9.0.0.txt
-# Date: 2016-06-01, 10:34:30 GMT
-# © 2016 Unicode®, Inc.
+# PropList-10.0.0.txt
+# Date: 2017-03-10, 08:25:30 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -199,6 +199,9 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA
1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
+11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
+11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR
11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD
12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
@@ -209,7 +212,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
-# Total code points: 246
+# Total code points: 252
# ================================================
@@ -471,6 +474,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O
0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH
0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA
@@ -508,7 +512,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU
0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
-0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU
+0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
@@ -726,6 +730,17 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E
11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM
+11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
+11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
+11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA
+11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA
+11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA
11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA
11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
@@ -737,6 +752,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O
11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA
+11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA
+11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA
16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK
@@ -750,7 +771,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 1238
+# Total code points: 1300
# ================================================
@@ -759,18 +780,20 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
-4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
+4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
+1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
-# Total code points: 88284
+# Total code points: 96174
# ================================================
@@ -826,12 +849,14 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA
0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA
0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA
+0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B3C ; Diacritic # Mn ORIYA SIGN NUKTA
0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
+0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA
0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA
0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT
@@ -871,10 +896,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE
+1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA
1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
-1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE
+1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1FBD ; Diacritic # Sk GREEK KORONIS
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
@@ -947,7 +973,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA
116B7 ; Diacritic # Mn TAKRI SIGN NUKTA
1172B ; Diacritic # Mn AHOM SIGN KILLER
+11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA
+11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER
+11A99 ; Diacritic # Mn SOYOMBO SUBJOINER
11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA
+11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA
+11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
@@ -960,7 +991,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
-# Total code points: 782
+# Total code points: 798
# ================================================
@@ -989,11 +1020,12 @@ AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETE
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
1135D ; Extender # Lo GRANTHA SIGN PLUTA
115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
+11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM
-16FE0 ; Extender # Lm TANGUT ITERATION MARK
+16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
-# Total code points: 42
+# Total code points: 44
# ================================================
@@ -1105,7 +1137,7 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
# ================================================
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
-4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
+4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
@@ -1117,8 +1149,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
-# Total code points: 80388
+# Total code points: 87882
# ================================================
@@ -1277,6 +1310,8 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA
1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
+11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA
16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA
16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP
@@ -1285,7 +1320,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
-# Total code points: 124
+# Total code points: 128
# ================================================
@@ -1402,9 +1437,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
-23E2..23FE ; Pattern_Syntax # So [29] WHITE TRAPEZIUM..POWER SLEEP SYMBOL
-23FF ; Pattern_Syntax # Cn <reserved-23FF>
-2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F>
@@ -1492,8 +1525,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC>
2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BC9 ; Pattern_Syntax # Cn <reserved-2BC9>
-2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
-2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB>
+2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
+2BD3..2BEB ; Pattern_Syntax # Cn [25] <reserved-2BD3>..<reserved-2BEB>
2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF>
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
@@ -1533,8 +1566,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN
2E41 ; Pattern_Syntax # Po REVERSED COMMA
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
-2E43..2E44 ; Pattern_Syntax # Po [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK
-2E45..2E7F ; Pattern_Syntax # Cn [59] <reserved-2E45>..<reserved-2E7F>
+2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
+2E4A..2E7F ; Pattern_Syntax # Cn [54] <reserved-2E4A>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
@@ -1576,4 +1609,10 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
# Total code points: 10
+# ================================================
+
+1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
+
+# Total code points: 26
+
# EOF
diff --git a/lib/stdlib/uc_spec/SpecialCasing.txt b/lib/stdlib/uc_spec/SpecialCasing.txt
index b23fa7f768..b9ba0d81c1 100644
--- a/lib/stdlib/uc_spec/SpecialCasing.txt
+++ b/lib/stdlib/uc_spec/SpecialCasing.txt
@@ -1,6 +1,6 @@
-# SpecialCasing-9.0.0.txt
-# Date: 2016-03-02, 18:55:13 GMT
-# © 2016 Unicode®, Inc.
+# SpecialCasing-10.0.0.txt
+# Date: 2017-04-14, 05:40:43 GMT
+# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@@ -197,7 +197,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# ================================================================================
# Conditional Mappings
-# The remainder of this file provides conditional casing data used to produce
+# The remainder of this file provides conditional casing data used to produce
# full case mappings.
# ================================================================================
# Language-Insensitive Mappings
diff --git a/lib/stdlib/uc_spec/UnicodeData.txt b/lib/stdlib/uc_spec/UnicodeData.txt
index a756976461..d89c64f526 100644
--- a/lib/stdlib/uc_spec/UnicodeData.txt
+++ b/lib/stdlib/uc_spec/UnicodeData.txt
@@ -2072,6 +2072,17 @@
085A;MANDAIC VOCALIZATION MARK;Mn;220;NSM;;;;;N;;;;;
085B;MANDAIC GEMINATION MARK;Mn;220;NSM;;;;;N;;;;;
085E;MANDAIC PUNCTUATION;Po;0;R;;;;;N;;;;;
+0860;SYRIAC LETTER MALAYALAM NGA;Lo;0;AL;;;;;N;;;;;
+0861;SYRIAC LETTER MALAYALAM JA;Lo;0;AL;;;;;N;;;;;
+0862;SYRIAC LETTER MALAYALAM NYA;Lo;0;AL;;;;;N;;;;;
+0863;SYRIAC LETTER MALAYALAM TTA;Lo;0;AL;;;;;N;;;;;
+0864;SYRIAC LETTER MALAYALAM NNA;Lo;0;AL;;;;;N;;;;;
+0865;SYRIAC LETTER MALAYALAM NNNA;Lo;0;AL;;;;;N;;;;;
+0866;SYRIAC LETTER MALAYALAM BHA;Lo;0;AL;;;;;N;;;;;
+0867;SYRIAC LETTER MALAYALAM RA;Lo;0;AL;;;;;N;;;;;
+0868;SYRIAC LETTER MALAYALAM LLA;Lo;0;AL;;;;;N;;;;;
+0869;SYRIAC LETTER MALAYALAM LLLA;Lo;0;AL;;;;;N;;;;;
+086A;SYRIAC LETTER MALAYALAM SSA;Lo;0;AL;;;;;N;;;;;
08A0;ARABIC LETTER BEH WITH SMALL V BELOW;Lo;0;AL;;;;;N;;;;;
08A1;ARABIC LETTER BEH WITH HAMZA ABOVE;Lo;0;AL;;;;;N;;;;;
08A2;ARABIC LETTER JEEM WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;;
@@ -2366,6 +2377,8 @@
09F9;BENGALI CURRENCY DENOMINATOR SIXTEEN;No;0;L;;;;16;N;;;;;
09FA;BENGALI ISSHAR;So;0;L;;;;;N;;;;;
09FB;BENGALI GANDA MARK;Sc;0;ET;;;;;N;;;;;
+09FC;BENGALI LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;;
+09FD;BENGALI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;;
0A01;GURMUKHI SIGN ADAK BINDI;Mn;0;NSM;;;;;N;;;;;
0A02;GURMUKHI SIGN BINDI;Mn;0;NSM;;;;;N;;;;;
0A03;GURMUKHI SIGN VISARGA;Mc;0;L;;;;;N;;;;;
@@ -2530,6 +2543,12 @@
0AF0;GUJARATI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;;
0AF1;GUJARATI RUPEE SIGN;Sc;0;ET;;;;;N;;;;;
0AF9;GUJARATI LETTER ZHA;Lo;0;L;;;;;N;;;;;
+0AFA;GUJARATI SIGN SUKUN;Mn;0;NSM;;;;;N;;;;;
+0AFB;GUJARATI SIGN SHADDA;Mn;0;NSM;;;;;N;;;;;
+0AFC;GUJARATI SIGN MADDAH;Mn;0;NSM;;;;;N;;;;;
+0AFD;GUJARATI SIGN THREE-DOT NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;;
+0AFE;GUJARATI SIGN CIRCLE NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;;
+0AFF;GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;;
0B01;ORIYA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;;
0B02;ORIYA SIGN ANUSVARA;Mc;0;L;;;;;N;;;;;
0B03;ORIYA SIGN VISARGA;Mc;0;L;;;;;N;;;;;
@@ -2876,6 +2895,7 @@
0CEF;KANNADA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
0CF1;KANNADA SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;;
0CF2;KANNADA SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;;
+0D00;MALAYALAM SIGN COMBINING ANUSVARA ABOVE;Mn;0;NSM;;;;;N;;;;;
0D01;MALAYALAM SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;;
0D02;MALAYALAM SIGN ANUSVARA;Mc;0;L;;;;;N;;;;;
0D03;MALAYALAM SIGN VISARGA;Mc;0;L;;;;;N;;;;;
@@ -2931,6 +2951,8 @@
0D38;MALAYALAM LETTER SA;Lo;0;L;;;;;N;;;;;
0D39;MALAYALAM LETTER HA;Lo;0;L;;;;;N;;;;;
0D3A;MALAYALAM LETTER TTTA;Lo;0;L;;;;;N;;;;;
+0D3B;MALAYALAM SIGN VERTICAL BAR VIRAMA;Mn;9;NSM;;;;;N;;;;;
+0D3C;MALAYALAM SIGN CIRCULAR VIRAMA;Mn;9;NSM;;;;;N;;;;;
0D3D;MALAYALAM SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;;
0D3E;MALAYALAM VOWEL SIGN AA;Mc;0;L;;;;;N;;;;;
0D3F;MALAYALAM VOWEL SIGN I;Mc;0;L;;;;;N;;;;;
@@ -6413,6 +6435,7 @@
1CF4;VEDIC TONE CANDRA ABOVE;Mn;230;NSM;;;;;N;;;;;
1CF5;VEDIC SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;;
1CF6;VEDIC SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;;
+1CF7;VEDIC SIGN ATIKRAMA;Mc;0;L;;;;;N;;;;;
1CF8;VEDIC TONE RING ABOVE;Mn;230;NSM;;;;;N;;;;;
1CF9;VEDIC TONE DOUBLE RING ABOVE;Mn;230;NSM;;;;;N;;;;;
1D00;LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;;
@@ -6661,6 +6684,10 @@
1DF3;COMBINING LATIN SMALL LETTER O WITH DIAERESIS;Mn;230;NSM;;;;;N;;;;;
1DF4;COMBINING LATIN SMALL LETTER U WITH DIAERESIS;Mn;230;NSM;;;;;N;;;;;
1DF5;COMBINING UP TACK ABOVE;Mn;230;NSM;;;;;N;;;;;
+1DF6;COMBINING KAVYKA ABOVE RIGHT;Mn;232;NSM;;;;;N;;;;;
+1DF7;COMBINING KAVYKA ABOVE LEFT;Mn;228;NSM;;;;;N;;;;;
+1DF8;COMBINING DOT ABOVE LEFT;Mn;228;NSM;;;;;N;;;;;
+1DF9;COMBINING WIDE INVERTED BRIDGE BELOW;Mn;220;NSM;;;;;N;;;;;
1DFB;COMBINING DELETION MARK;Mn;230;NSM;;;;;N;;;;;
1DFC;COMBINING DOUBLE INVERTED BREVE BELOW;Mn;233;NSM;;;;;N;;;;;
1DFD;COMBINING ALMOST EQUAL TO BELOW;Mn;220;NSM;;;;;N;;;;;
@@ -7339,6 +7366,7 @@
20BC;MANAT SIGN;Sc;0;ET;;;;;N;;;;;
20BD;RUBLE SIGN;Sc;0;ET;;;;;N;;;;;
20BE;LARI SIGN;Sc;0;ET;;;;;N;;;;;
+20BF;BITCOIN SIGN;Sc;0;ET;;;;;N;;;;;
20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;;
20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;;
20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;;
@@ -8135,6 +8163,7 @@
23FC;POWER ON-OFF SYMBOL;So;0;ON;;;;;N;;;;;
23FD;POWER ON SYMBOL;So;0;ON;;;;;N;;;;;
23FE;POWER SLEEP SYMBOL;So;0;ON;;;;;N;;;;;
+23FF;OBSERVER EYE SYMBOL;So;0;ON;;;;;N;;;;;
2400;SYMBOL FOR NULL;So;0;ON;;;;;N;GRAPHIC FOR NULL;;;;
2401;SYMBOL FOR START OF HEADING;So;0;ON;;;;;N;GRAPHIC FOR START OF HEADING;;;;
2402;SYMBOL FOR START OF TEXT;So;0;ON;;;;;N;GRAPHIC FOR START OF TEXT;;;;
@@ -10083,6 +10112,7 @@
2BCF;ROTATED WHITE FOUR POINTED CUSP;So;0;ON;;;;;N;;;;;
2BD0;SQUARE POSITION INDICATOR;So;0;ON;;;;;N;;;;;
2BD1;UNCERTAINTY SIGN;So;0;ON;;;;;N;;;;;
+2BD2;GROUP MARK;So;0;ON;;;;;N;;;;;
2BEC;LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;;
2BED;UPWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;;
2BEE;RIGHTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;;
@@ -10615,6 +10645,11 @@
2E42;DOUBLE LOW-REVERSED-9 QUOTATION MARK;Ps;0;ON;;;;;N;;;;;
2E43;DASH WITH LEFT UPTURN;Po;0;ON;;;;;N;;;;;
2E44;DOUBLE SUSPENSION MARK;Po;0;ON;;;;;N;;;;;
+2E45;INVERTED LOW KAVYKA;Po;0;ON;;;;;N;;;;;
+2E46;INVERTED LOW KAVYKA WITH KAVYKA ABOVE;Po;0;ON;;;;;N;;;;;
+2E47;LOW KAVYKA;Po;0;ON;;;;;N;;;;;
+2E48;LOW KAVYKA WITH DOT;Po;0;ON;;;;;N;;;;;
+2E49;DOUBLE STACKED COMMA;Po;0;ON;;;;;N;;;;;
2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;;
2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;;
2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;;
@@ -11250,6 +11285,7 @@
312B;BOPOMOFO LETTER NG;Lo;0;L;;;;;N;;;;;
312C;BOPOMOFO LETTER GN;Lo;0;L;;;;;N;;;;;
312D;BOPOMOFO LETTER IH;Lo;0;L;;;;;N;;;;;
+312E;BOPOMOFO LETTER O WITH DOT ABOVE;Lo;0;L;;;;;N;;;;;
3131;HANGUL LETTER KIYEOK;Lo;0;L;<compat> 1100;;;;N;HANGUL LETTER GIYEOG;;;;
3132;HANGUL LETTER SSANGKIYEOK;Lo;0;L;<compat> 1101;;;;N;HANGUL LETTER SSANG GIYEOG;;;;
3133;HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
@@ -12016,7 +12052,7 @@
4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;;
4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;;
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
-9FD5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
+9FEA;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;;
A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;;
@@ -17093,6 +17129,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10321;OLD ITALIC NUMERAL FIVE;No;0;L;;;;5;N;;;;;
10322;OLD ITALIC NUMERAL TEN;No;0;L;;;;10;N;;;;;
10323;OLD ITALIC NUMERAL FIFTY;No;0;L;;;;50;N;;;;;
+1032D;OLD ITALIC LETTER YE;Lo;0;L;;;;;N;;;;;
+1032E;OLD ITALIC LETTER NORTHERN TSE;Lo;0;L;;;;;N;;;;;
+1032F;OLD ITALIC LETTER SOUTHERN TSE;Lo;0;L;;;;;N;;;;;
10330;GOTHIC LETTER AHSA;Lo;0;L;;;;;N;;;;;
10331;GOTHIC LETTER BAIRKAN;Lo;0;L;;;;;N;;;;;
10332;GOTHIC LETTER GIBA;Lo;0;L;;;;;N;;;;;
@@ -20068,6 +20107,158 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
118F1;WARANG CITI NUMBER EIGHTY;No;0;L;;;;80;N;;;;;
118F2;WARANG CITI NUMBER NINETY;No;0;L;;;;90;N;;;;;
118FF;WARANG CITI OM;Lo;0;L;;;;;N;;;;;
+11A00;ZANABAZAR SQUARE LETTER A;Lo;0;L;;;;;N;;;;;
+11A01;ZANABAZAR SQUARE VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;;
+11A02;ZANABAZAR SQUARE VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;;
+11A03;ZANABAZAR SQUARE VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;;
+11A04;ZANABAZAR SQUARE VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;;
+11A05;ZANABAZAR SQUARE VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;;
+11A06;ZANABAZAR SQUARE VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;;
+11A07;ZANABAZAR SQUARE VOWEL SIGN AI;Mc;0;L;;;;;N;;;;;
+11A08;ZANABAZAR SQUARE VOWEL SIGN AU;Mc;0;L;;;;;N;;;;;
+11A09;ZANABAZAR SQUARE VOWEL SIGN REVERSED I;Mn;0;NSM;;;;;N;;;;;
+11A0A;ZANABAZAR SQUARE VOWEL LENGTH MARK;Mn;0;NSM;;;;;N;;;;;
+11A0B;ZANABAZAR SQUARE LETTER KA;Lo;0;L;;;;;N;;;;;
+11A0C;ZANABAZAR SQUARE LETTER KHA;Lo;0;L;;;;;N;;;;;
+11A0D;ZANABAZAR SQUARE LETTER GA;Lo;0;L;;;;;N;;;;;
+11A0E;ZANABAZAR SQUARE LETTER GHA;Lo;0;L;;;;;N;;;;;
+11A0F;ZANABAZAR SQUARE LETTER NGA;Lo;0;L;;;;;N;;;;;
+11A10;ZANABAZAR SQUARE LETTER CA;Lo;0;L;;;;;N;;;;;
+11A11;ZANABAZAR SQUARE LETTER CHA;Lo;0;L;;;;;N;;;;;
+11A12;ZANABAZAR SQUARE LETTER JA;Lo;0;L;;;;;N;;;;;
+11A13;ZANABAZAR SQUARE LETTER NYA;Lo;0;L;;;;;N;;;;;
+11A14;ZANABAZAR SQUARE LETTER TTA;Lo;0;L;;;;;N;;;;;
+11A15;ZANABAZAR SQUARE LETTER TTHA;Lo;0;L;;;;;N;;;;;
+11A16;ZANABAZAR SQUARE LETTER DDA;Lo;0;L;;;;;N;;;;;
+11A17;ZANABAZAR SQUARE LETTER DDHA;Lo;0;L;;;;;N;;;;;
+11A18;ZANABAZAR SQUARE LETTER NNA;Lo;0;L;;;;;N;;;;;
+11A19;ZANABAZAR SQUARE LETTER TA;Lo;0;L;;;;;N;;;;;
+11A1A;ZANABAZAR SQUARE LETTER THA;Lo;0;L;;;;;N;;;;;
+11A1B;ZANABAZAR SQUARE LETTER DA;Lo;0;L;;;;;N;;;;;
+11A1C;ZANABAZAR SQUARE LETTER DHA;Lo;0;L;;;;;N;;;;;
+11A1D;ZANABAZAR SQUARE LETTER NA;Lo;0;L;;;;;N;;;;;
+11A1E;ZANABAZAR SQUARE LETTER PA;Lo;0;L;;;;;N;;;;;
+11A1F;ZANABAZAR SQUARE LETTER PHA;Lo;0;L;;;;;N;;;;;
+11A20;ZANABAZAR SQUARE LETTER BA;Lo;0;L;;;;;N;;;;;
+11A21;ZANABAZAR SQUARE LETTER BHA;Lo;0;L;;;;;N;;;;;
+11A22;ZANABAZAR SQUARE LETTER MA;Lo;0;L;;;;;N;;;;;
+11A23;ZANABAZAR SQUARE LETTER TSA;Lo;0;L;;;;;N;;;;;
+11A24;ZANABAZAR SQUARE LETTER TSHA;Lo;0;L;;;;;N;;;;;
+11A25;ZANABAZAR SQUARE LETTER DZA;Lo;0;L;;;;;N;;;;;
+11A26;ZANABAZAR SQUARE LETTER DZHA;Lo;0;L;;;;;N;;;;;
+11A27;ZANABAZAR SQUARE LETTER ZHA;Lo;0;L;;;;;N;;;;;
+11A28;ZANABAZAR SQUARE LETTER ZA;Lo;0;L;;;;;N;;;;;
+11A29;ZANABAZAR SQUARE LETTER -A;Lo;0;L;;;;;N;;;;;
+11A2A;ZANABAZAR SQUARE LETTER YA;Lo;0;L;;;;;N;;;;;
+11A2B;ZANABAZAR SQUARE LETTER RA;Lo;0;L;;;;;N;;;;;
+11A2C;ZANABAZAR SQUARE LETTER LA;Lo;0;L;;;;;N;;;;;
+11A2D;ZANABAZAR SQUARE LETTER VA;Lo;0;L;;;;;N;;;;;
+11A2E;ZANABAZAR SQUARE LETTER SHA;Lo;0;L;;;;;N;;;;;
+11A2F;ZANABAZAR SQUARE LETTER SSA;Lo;0;L;;;;;N;;;;;
+11A30;ZANABAZAR SQUARE LETTER SA;Lo;0;L;;;;;N;;;;;
+11A31;ZANABAZAR SQUARE LETTER HA;Lo;0;L;;;;;N;;;;;
+11A32;ZANABAZAR SQUARE LETTER KSSA;Lo;0;L;;;;;N;;;;;
+11A33;ZANABAZAR SQUARE FINAL CONSONANT MARK;Mn;0;NSM;;;;;N;;;;;
+11A34;ZANABAZAR SQUARE SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;;
+11A35;ZANABAZAR SQUARE SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;;
+11A36;ZANABAZAR SQUARE SIGN CANDRABINDU WITH ORNAMENT;Mn;0;NSM;;;;;N;;;;;
+11A37;ZANABAZAR SQUARE SIGN CANDRA WITH ORNAMENT;Mn;0;NSM;;;;;N;;;;;
+11A38;ZANABAZAR SQUARE SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
+11A39;ZANABAZAR SQUARE SIGN VISARGA;Mc;0;L;;;;;N;;;;;
+11A3A;ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA;Lo;0;L;;;;;N;;;;;
+11A3B;ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA;Mn;0;NSM;;;;;N;;;;;
+11A3C;ZANABAZAR SQUARE CLUSTER-FINAL LETTER RA;Mn;0;NSM;;;;;N;;;;;
+11A3D;ZANABAZAR SQUARE CLUSTER-FINAL LETTER LA;Mn;0;NSM;;;;;N;;;;;
+11A3E;ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA;Mn;0;NSM;;;;;N;;;;;
+11A3F;ZANABAZAR SQUARE INITIAL HEAD MARK;Po;0;L;;;;;N;;;;;
+11A40;ZANABAZAR SQUARE CLOSING HEAD MARK;Po;0;L;;;;;N;;;;;
+11A41;ZANABAZAR SQUARE MARK TSHEG;Po;0;L;;;;;N;;;;;
+11A42;ZANABAZAR SQUARE MARK SHAD;Po;0;L;;;;;N;;;;;
+11A43;ZANABAZAR SQUARE MARK DOUBLE SHAD;Po;0;L;;;;;N;;;;;
+11A44;ZANABAZAR SQUARE MARK LONG TSHEG;Po;0;L;;;;;N;;;;;
+11A45;ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK;Po;0;L;;;;;N;;;;;
+11A46;ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK;Po;0;L;;;;;N;;;;;
+11A47;ZANABAZAR SQUARE SUBJOINER;Mn;9;NSM;;;;;N;;;;;
+11A50;SOYOMBO LETTER A;Lo;0;L;;;;;N;;;;;
+11A51;SOYOMBO VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;;
+11A52;SOYOMBO VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;;
+11A53;SOYOMBO VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;;
+11A54;SOYOMBO VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;;
+11A55;SOYOMBO VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;;
+11A56;SOYOMBO VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;;
+11A57;SOYOMBO VOWEL SIGN AI;Mc;0;L;;;;;N;;;;;
+11A58;SOYOMBO VOWEL SIGN AU;Mc;0;L;;;;;N;;;;;
+11A59;SOYOMBO VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;;
+11A5A;SOYOMBO VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;;
+11A5B;SOYOMBO VOWEL LENGTH MARK;Mn;0;NSM;;;;;N;;;;;
+11A5C;SOYOMBO LETTER KA;Lo;0;L;;;;;N;;;;;
+11A5D;SOYOMBO LETTER KHA;Lo;0;L;;;;;N;;;;;
+11A5E;SOYOMBO LETTER GA;Lo;0;L;;;;;N;;;;;
+11A5F;SOYOMBO LETTER GHA;Lo;0;L;;;;;N;;;;;
+11A60;SOYOMBO LETTER NGA;Lo;0;L;;;;;N;;;;;
+11A61;SOYOMBO LETTER CA;Lo;0;L;;;;;N;;;;;
+11A62;SOYOMBO LETTER CHA;Lo;0;L;;;;;N;;;;;
+11A63;SOYOMBO LETTER JA;Lo;0;L;;;;;N;;;;;
+11A64;SOYOMBO LETTER JHA;Lo;0;L;;;;;N;;;;;
+11A65;SOYOMBO LETTER NYA;Lo;0;L;;;;;N;;;;;
+11A66;SOYOMBO LETTER TTA;Lo;0;L;;;;;N;;;;;
+11A67;SOYOMBO LETTER TTHA;Lo;0;L;;;;;N;;;;;
+11A68;SOYOMBO LETTER DDA;Lo;0;L;;;;;N;;;;;
+11A69;SOYOMBO LETTER DDHA;Lo;0;L;;;;;N;;;;;
+11A6A;SOYOMBO LETTER NNA;Lo;0;L;;;;;N;;;;;
+11A6B;SOYOMBO LETTER TA;Lo;0;L;;;;;N;;;;;
+11A6C;SOYOMBO LETTER THA;Lo;0;L;;;;;N;;;;;
+11A6D;SOYOMBO LETTER DA;Lo;0;L;;;;;N;;;;;
+11A6E;SOYOMBO LETTER DHA;Lo;0;L;;;;;N;;;;;
+11A6F;SOYOMBO LETTER NA;Lo;0;L;;;;;N;;;;;
+11A70;SOYOMBO LETTER PA;Lo;0;L;;;;;N;;;;;
+11A71;SOYOMBO LETTER PHA;Lo;0;L;;;;;N;;;;;
+11A72;SOYOMBO LETTER BA;Lo;0;L;;;;;N;;;;;
+11A73;SOYOMBO LETTER BHA;Lo;0;L;;;;;N;;;;;
+11A74;SOYOMBO LETTER MA;Lo;0;L;;;;;N;;;;;
+11A75;SOYOMBO LETTER TSA;Lo;0;L;;;;;N;;;;;
+11A76;SOYOMBO LETTER TSHA;Lo;0;L;;;;;N;;;;;
+11A77;SOYOMBO LETTER DZA;Lo;0;L;;;;;N;;;;;
+11A78;SOYOMBO LETTER ZHA;Lo;0;L;;;;;N;;;;;
+11A79;SOYOMBO LETTER ZA;Lo;0;L;;;;;N;;;;;
+11A7A;SOYOMBO LETTER -A;Lo;0;L;;;;;N;;;;;
+11A7B;SOYOMBO LETTER YA;Lo;0;L;;;;;N;;;;;
+11A7C;SOYOMBO LETTER RA;Lo;0;L;;;;;N;;;;;
+11A7D;SOYOMBO LETTER LA;Lo;0;L;;;;;N;;;;;
+11A7E;SOYOMBO LETTER VA;Lo;0;L;;;;;N;;;;;
+11A7F;SOYOMBO LETTER SHA;Lo;0;L;;;;;N;;;;;
+11A80;SOYOMBO LETTER SSA;Lo;0;L;;;;;N;;;;;
+11A81;SOYOMBO LETTER SA;Lo;0;L;;;;;N;;;;;
+11A82;SOYOMBO LETTER HA;Lo;0;L;;;;;N;;;;;
+11A83;SOYOMBO LETTER KSSA;Lo;0;L;;;;;N;;;;;
+11A86;SOYOMBO CLUSTER-INITIAL LETTER RA;Lo;0;L;;;;;N;;;;;
+11A87;SOYOMBO CLUSTER-INITIAL LETTER LA;Lo;0;L;;;;;N;;;;;
+11A88;SOYOMBO CLUSTER-INITIAL LETTER SHA;Lo;0;L;;;;;N;;;;;
+11A89;SOYOMBO CLUSTER-INITIAL LETTER SA;Lo;0;L;;;;;N;;;;;
+11A8A;SOYOMBO FINAL CONSONANT SIGN G;Mn;0;NSM;;;;;N;;;;;
+11A8B;SOYOMBO FINAL CONSONANT SIGN K;Mn;0;NSM;;;;;N;;;;;
+11A8C;SOYOMBO FINAL CONSONANT SIGN NG;Mn;0;NSM;;;;;N;;;;;
+11A8D;SOYOMBO FINAL CONSONANT SIGN D;Mn;0;NSM;;;;;N;;;;;
+11A8E;SOYOMBO FINAL CONSONANT SIGN N;Mn;0;NSM;;;;;N;;;;;
+11A8F;SOYOMBO FINAL CONSONANT SIGN B;Mn;0;NSM;;;;;N;;;;;
+11A90;SOYOMBO FINAL CONSONANT SIGN M;Mn;0;NSM;;;;;N;;;;;
+11A91;SOYOMBO FINAL CONSONANT SIGN R;Mn;0;NSM;;;;;N;;;;;
+11A92;SOYOMBO FINAL CONSONANT SIGN L;Mn;0;NSM;;;;;N;;;;;
+11A93;SOYOMBO FINAL CONSONANT SIGN SH;Mn;0;NSM;;;;;N;;;;;
+11A94;SOYOMBO FINAL CONSONANT SIGN S;Mn;0;NSM;;;;;N;;;;;
+11A95;SOYOMBO FINAL CONSONANT SIGN -A;Mn;0;NSM;;;;;N;;;;;
+11A96;SOYOMBO SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
+11A97;SOYOMBO SIGN VISARGA;Mc;0;L;;;;;N;;;;;
+11A98;SOYOMBO GEMINATION MARK;Mn;0;NSM;;;;;N;;;;;
+11A99;SOYOMBO SUBJOINER;Mn;9;NSM;;;;;N;;;;;
+11A9A;SOYOMBO MARK TSHEG;Po;0;L;;;;;N;;;;;
+11A9B;SOYOMBO MARK SHAD;Po;0;L;;;;;N;;;;;
+11A9C;SOYOMBO MARK DOUBLE SHAD;Po;0;L;;;;;N;;;;;
+11A9E;SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME;Po;0;L;;;;;N;;;;;
+11A9F;SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME;Po;0;L;;;;;N;;;;;
+11AA0;SOYOMBO HEAD MARK WITH MOON AND SUN;Po;0;L;;;;;N;;;;;
+11AA1;SOYOMBO TERMINAL MARK-1;Po;0;L;;;;;N;;;;;
+11AA2;SOYOMBO TERMINAL MARK-2;Po;0;L;;;;;N;;;;;
11AC0;PAU CIN HAU LETTER PA;Lo;0;L;;;;;N;;;;;
11AC1;PAU CIN HAU LETTER KA;Lo;0;L;;;;;N;;;;;
11AC2;PAU CIN HAU LETTER LA;Lo;0;L;;;;;N;;;;;
@@ -20290,6 +20481,81 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
11CB4;MARCHEN VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
11CB5;MARCHEN SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
11CB6;MARCHEN SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;;
+11D00;MASARAM GONDI LETTER A;Lo;0;L;;;;;N;;;;;
+11D01;MASARAM GONDI LETTER AA;Lo;0;L;;;;;N;;;;;
+11D02;MASARAM GONDI LETTER I;Lo;0;L;;;;;N;;;;;
+11D03;MASARAM GONDI LETTER II;Lo;0;L;;;;;N;;;;;
+11D04;MASARAM GONDI LETTER U;Lo;0;L;;;;;N;;;;;
+11D05;MASARAM GONDI LETTER UU;Lo;0;L;;;;;N;;;;;
+11D06;MASARAM GONDI LETTER E;Lo;0;L;;;;;N;;;;;
+11D08;MASARAM GONDI LETTER AI;Lo;0;L;;;;;N;;;;;
+11D09;MASARAM GONDI LETTER O;Lo;0;L;;;;;N;;;;;
+11D0B;MASARAM GONDI LETTER AU;Lo;0;L;;;;;N;;;;;
+11D0C;MASARAM GONDI LETTER KA;Lo;0;L;;;;;N;;;;;
+11D0D;MASARAM GONDI LETTER KHA;Lo;0;L;;;;;N;;;;;
+11D0E;MASARAM GONDI LETTER GA;Lo;0;L;;;;;N;;;;;
+11D0F;MASARAM GONDI LETTER GHA;Lo;0;L;;;;;N;;;;;
+11D10;MASARAM GONDI LETTER NGA;Lo;0;L;;;;;N;;;;;
+11D11;MASARAM GONDI LETTER CA;Lo;0;L;;;;;N;;;;;
+11D12;MASARAM GONDI LETTER CHA;Lo;0;L;;;;;N;;;;;
+11D13;MASARAM GONDI LETTER JA;Lo;0;L;;;;;N;;;;;
+11D14;MASARAM GONDI LETTER JHA;Lo;0;L;;;;;N;;;;;
+11D15;MASARAM GONDI LETTER NYA;Lo;0;L;;;;;N;;;;;
+11D16;MASARAM GONDI LETTER TTA;Lo;0;L;;;;;N;;;;;
+11D17;MASARAM GONDI LETTER TTHA;Lo;0;L;;;;;N;;;;;
+11D18;MASARAM GONDI LETTER DDA;Lo;0;L;;;;;N;;;;;
+11D19;MASARAM GONDI LETTER DDHA;Lo;0;L;;;;;N;;;;;
+11D1A;MASARAM GONDI LETTER NNA;Lo;0;L;;;;;N;;;;;
+11D1B;MASARAM GONDI LETTER TA;Lo;0;L;;;;;N;;;;;
+11D1C;MASARAM GONDI LETTER THA;Lo;0;L;;;;;N;;;;;
+11D1D;MASARAM GONDI LETTER DA;Lo;0;L;;;;;N;;;;;
+11D1E;MASARAM GONDI LETTER DHA;Lo;0;L;;;;;N;;;;;
+11D1F;MASARAM GONDI LETTER NA;Lo;0;L;;;;;N;;;;;
+11D20;MASARAM GONDI LETTER PA;Lo;0;L;;;;;N;;;;;
+11D21;MASARAM GONDI LETTER PHA;Lo;0;L;;;;;N;;;;;
+11D22;MASARAM GONDI LETTER BA;Lo;0;L;;;;;N;;;;;
+11D23;MASARAM GONDI LETTER BHA;Lo;0;L;;;;;N;;;;;
+11D24;MASARAM GONDI LETTER MA;Lo;0;L;;;;;N;;;;;
+11D25;MASARAM GONDI LETTER YA;Lo;0;L;;;;;N;;;;;
+11D26;MASARAM GONDI LETTER RA;Lo;0;L;;;;;N;;;;;
+11D27;MASARAM GONDI LETTER LA;Lo;0;L;;;;;N;;;;;
+11D28;MASARAM GONDI LETTER VA;Lo;0;L;;;;;N;;;;;
+11D29;MASARAM GONDI LETTER SHA;Lo;0;L;;;;;N;;;;;
+11D2A;MASARAM GONDI LETTER SSA;Lo;0;L;;;;;N;;;;;
+11D2B;MASARAM GONDI LETTER SA;Lo;0;L;;;;;N;;;;;
+11D2C;MASARAM GONDI LETTER HA;Lo;0;L;;;;;N;;;;;
+11D2D;MASARAM GONDI LETTER LLA;Lo;0;L;;;;;N;;;;;
+11D2E;MASARAM GONDI LETTER KSSA;Lo;0;L;;;;;N;;;;;
+11D2F;MASARAM GONDI LETTER JNYA;Lo;0;L;;;;;N;;;;;
+11D30;MASARAM GONDI LETTER TRA;Lo;0;L;;;;;N;;;;;
+11D31;MASARAM GONDI VOWEL SIGN AA;Mn;0;NSM;;;;;N;;;;;
+11D32;MASARAM GONDI VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;;
+11D33;MASARAM GONDI VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;;
+11D34;MASARAM GONDI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;;
+11D35;MASARAM GONDI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;;
+11D36;MASARAM GONDI VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;;
+11D3A;MASARAM GONDI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;;
+11D3C;MASARAM GONDI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;;
+11D3D;MASARAM GONDI VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;;
+11D3F;MASARAM GONDI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;;
+11D40;MASARAM GONDI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
+11D41;MASARAM GONDI SIGN VISARGA;Mn;0;NSM;;;;;N;;;;;
+11D42;MASARAM GONDI SIGN NUKTA;Mn;7;NSM;;;;;N;;;;;
+11D43;MASARAM GONDI SIGN CANDRA;Mn;0;NSM;;;;;N;;;;;
+11D44;MASARAM GONDI SIGN HALANTA;Mn;9;NSM;;;;;N;;;;;
+11D45;MASARAM GONDI VIRAMA;Mn;9;NSM;;;;;N;;;;;
+11D46;MASARAM GONDI REPHA;Lo;0;L;;;;;N;;;;;
+11D47;MASARAM GONDI RA-KARA;Mn;0;NSM;;;;;N;;;;;
+11D50;MASARAM GONDI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
+11D51;MASARAM GONDI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
+11D52;MASARAM GONDI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
+11D53;MASARAM GONDI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
+11D54;MASARAM GONDI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
+11D55;MASARAM GONDI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
+11D56;MASARAM GONDI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
+11D57;MASARAM GONDI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
+11D58;MASARAM GONDI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
+11D59;MASARAM GONDI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
12000;CUNEIFORM SIGN A;Lo;0;L;;;;;N;;;;;
12001;CUNEIFORM SIGN A TIMES A;Lo;0;L;;;;;N;;;;;
12002;CUNEIFORM SIGN A TIMES BAD;Lo;0;L;;;;;N;;;;;
@@ -24087,6 +24353,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
16F9E;MIAO LETTER REFORMED TONE-6;Lm;0;L;;;;;N;;;;;
16F9F;MIAO LETTER REFORMED TONE-8;Lm;0;L;;;;;N;;;;;
16FE0;TANGUT ITERATION MARK;Lm;0;L;;;;;N;;;;;
+16FE1;NUSHU ITERATION MARK;Lm;0;L;;;;;N;;;;;
17000;<Tangut Ideograph, First>;Lo;0;L;;;;;N;;;;;
187EC;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;;
18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;;
@@ -24846,6 +25113,687 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
18AF2;TANGUT COMPONENT-755;Lo;0;L;;;;;N;;;;;
1B000;KATAKANA LETTER ARCHAIC E;Lo;0;L;;;;;N;;;;;
1B001;HIRAGANA LETTER ARCHAIC YE;Lo;0;L;;;;;N;;;;;
+1B002;HENTAIGANA LETTER A-1;Lo;0;L;;;;;N;;;;;
+1B003;HENTAIGANA LETTER A-2;Lo;0;L;;;;;N;;;;;
+1B004;HENTAIGANA LETTER A-3;Lo;0;L;;;;;N;;;;;
+1B005;HENTAIGANA LETTER A-WO;Lo;0;L;;;;;N;;;;;
+1B006;HENTAIGANA LETTER I-1;Lo;0;L;;;;;N;;;;;
+1B007;HENTAIGANA LETTER I-2;Lo;0;L;;;;;N;;;;;
+1B008;HENTAIGANA LETTER I-3;Lo;0;L;;;;;N;;;;;
+1B009;HENTAIGANA LETTER I-4;Lo;0;L;;;;;N;;;;;
+1B00A;HENTAIGANA LETTER U-1;Lo;0;L;;;;;N;;;;;
+1B00B;HENTAIGANA LETTER U-2;Lo;0;L;;;;;N;;;;;
+1B00C;HENTAIGANA LETTER U-3;Lo;0;L;;;;;N;;;;;
+1B00D;HENTAIGANA LETTER U-4;Lo;0;L;;;;;N;;;;;
+1B00E;HENTAIGANA LETTER U-5;Lo;0;L;;;;;N;;;;;
+1B00F;HENTAIGANA LETTER E-2;Lo;0;L;;;;;N;;;;;
+1B010;HENTAIGANA LETTER E-3;Lo;0;L;;;;;N;;;;;
+1B011;HENTAIGANA LETTER E-4;Lo;0;L;;;;;N;;;;;
+1B012;HENTAIGANA LETTER E-5;Lo;0;L;;;;;N;;;;;
+1B013;HENTAIGANA LETTER E-6;Lo;0;L;;;;;N;;;;;
+1B014;HENTAIGANA LETTER O-1;Lo;0;L;;;;;N;;;;;
+1B015;HENTAIGANA LETTER O-2;Lo;0;L;;;;;N;;;;;
+1B016;HENTAIGANA LETTER O-3;Lo;0;L;;;;;N;;;;;
+1B017;HENTAIGANA LETTER KA-1;Lo;0;L;;;;;N;;;;;
+1B018;HENTAIGANA LETTER KA-2;Lo;0;L;;;;;N;;;;;
+1B019;HENTAIGANA LETTER KA-3;Lo;0;L;;;;;N;;;;;
+1B01A;HENTAIGANA LETTER KA-4;Lo;0;L;;;;;N;;;;;
+1B01B;HENTAIGANA LETTER KA-5;Lo;0;L;;;;;N;;;;;
+1B01C;HENTAIGANA LETTER KA-6;Lo;0;L;;;;;N;;;;;
+1B01D;HENTAIGANA LETTER KA-7;Lo;0;L;;;;;N;;;;;
+1B01E;HENTAIGANA LETTER KA-8;Lo;0;L;;;;;N;;;;;
+1B01F;HENTAIGANA LETTER KA-9;Lo;0;L;;;;;N;;;;;
+1B020;HENTAIGANA LETTER KA-10;Lo;0;L;;;;;N;;;;;
+1B021;HENTAIGANA LETTER KA-11;Lo;0;L;;;;;N;;;;;
+1B022;HENTAIGANA LETTER KA-KE;Lo;0;L;;;;;N;;;;;
+1B023;HENTAIGANA LETTER KI-1;Lo;0;L;;;;;N;;;;;
+1B024;HENTAIGANA LETTER KI-2;Lo;0;L;;;;;N;;;;;
+1B025;HENTAIGANA LETTER KI-3;Lo;0;L;;;;;N;;;;;
+1B026;HENTAIGANA LETTER KI-4;Lo;0;L;;;;;N;;;;;
+1B027;HENTAIGANA LETTER KI-5;Lo;0;L;;;;;N;;;;;
+1B028;HENTAIGANA LETTER KI-6;Lo;0;L;;;;;N;;;;;
+1B029;HENTAIGANA LETTER KI-7;Lo;0;L;;;;;N;;;;;
+1B02A;HENTAIGANA LETTER KI-8;Lo;0;L;;;;;N;;;;;
+1B02B;HENTAIGANA LETTER KU-1;Lo;0;L;;;;;N;;;;;
+1B02C;HENTAIGANA LETTER KU-2;Lo;0;L;;;;;N;;;;;
+1B02D;HENTAIGANA LETTER KU-3;Lo;0;L;;;;;N;;;;;
+1B02E;HENTAIGANA LETTER KU-4;Lo;0;L;;;;;N;;;;;
+1B02F;HENTAIGANA LETTER KU-5;Lo;0;L;;;;;N;;;;;
+1B030;HENTAIGANA LETTER KU-6;Lo;0;L;;;;;N;;;;;
+1B031;HENTAIGANA LETTER KU-7;Lo;0;L;;;;;N;;;;;
+1B032;HENTAIGANA LETTER KE-1;Lo;0;L;;;;;N;;;;;
+1B033;HENTAIGANA LETTER KE-2;Lo;0;L;;;;;N;;;;;
+1B034;HENTAIGANA LETTER KE-3;Lo;0;L;;;;;N;;;;;
+1B035;HENTAIGANA LETTER KE-4;Lo;0;L;;;;;N;;;;;
+1B036;HENTAIGANA LETTER KE-5;Lo;0;L;;;;;N;;;;;
+1B037;HENTAIGANA LETTER KE-6;Lo;0;L;;;;;N;;;;;
+1B038;HENTAIGANA LETTER KO-1;Lo;0;L;;;;;N;;;;;
+1B039;HENTAIGANA LETTER KO-2;Lo;0;L;;;;;N;;;;;
+1B03A;HENTAIGANA LETTER KO-3;Lo;0;L;;;;;N;;;;;
+1B03B;HENTAIGANA LETTER KO-KI;Lo;0;L;;;;;N;;;;;
+1B03C;HENTAIGANA LETTER SA-1;Lo;0;L;;;;;N;;;;;
+1B03D;HENTAIGANA LETTER SA-2;Lo;0;L;;;;;N;;;;;
+1B03E;HENTAIGANA LETTER SA-3;Lo;0;L;;;;;N;;;;;
+1B03F;HENTAIGANA LETTER SA-4;Lo;0;L;;;;;N;;;;;
+1B040;HENTAIGANA LETTER SA-5;Lo;0;L;;;;;N;;;;;
+1B041;HENTAIGANA LETTER SA-6;Lo;0;L;;;;;N;;;;;
+1B042;HENTAIGANA LETTER SA-7;Lo;0;L;;;;;N;;;;;
+1B043;HENTAIGANA LETTER SA-8;Lo;0;L;;;;;N;;;;;
+1B044;HENTAIGANA LETTER SI-1;Lo;0;L;;;;;N;;;;;
+1B045;HENTAIGANA LETTER SI-2;Lo;0;L;;;;;N;;;;;
+1B046;HENTAIGANA LETTER SI-3;Lo;0;L;;;;;N;;;;;
+1B047;HENTAIGANA LETTER SI-4;Lo;0;L;;;;;N;;;;;
+1B048;HENTAIGANA LETTER SI-5;Lo;0;L;;;;;N;;;;;
+1B049;HENTAIGANA LETTER SI-6;Lo;0;L;;;;;N;;;;;
+1B04A;HENTAIGANA LETTER SU-1;Lo;0;L;;;;;N;;;;;
+1B04B;HENTAIGANA LETTER SU-2;Lo;0;L;;;;;N;;;;;
+1B04C;HENTAIGANA LETTER SU-3;Lo;0;L;;;;;N;;;;;
+1B04D;HENTAIGANA LETTER SU-4;Lo;0;L;;;;;N;;;;;
+1B04E;HENTAIGANA LETTER SU-5;Lo;0;L;;;;;N;;;;;
+1B04F;HENTAIGANA LETTER SU-6;Lo;0;L;;;;;N;;;;;
+1B050;HENTAIGANA LETTER SU-7;Lo;0;L;;;;;N;;;;;
+1B051;HENTAIGANA LETTER SU-8;Lo;0;L;;;;;N;;;;;
+1B052;HENTAIGANA LETTER SE-1;Lo;0;L;;;;;N;;;;;
+1B053;HENTAIGANA LETTER SE-2;Lo;0;L;;;;;N;;;;;
+1B054;HENTAIGANA LETTER SE-3;Lo;0;L;;;;;N;;;;;
+1B055;HENTAIGANA LETTER SE-4;Lo;0;L;;;;;N;;;;;
+1B056;HENTAIGANA LETTER SE-5;Lo;0;L;;;;;N;;;;;
+1B057;HENTAIGANA LETTER SO-1;Lo;0;L;;;;;N;;;;;
+1B058;HENTAIGANA LETTER SO-2;Lo;0;L;;;;;N;;;;;
+1B059;HENTAIGANA LETTER SO-3;Lo;0;L;;;;;N;;;;;
+1B05A;HENTAIGANA LETTER SO-4;Lo;0;L;;;;;N;;;;;
+1B05B;HENTAIGANA LETTER SO-5;Lo;0;L;;;;;N;;;;;
+1B05C;HENTAIGANA LETTER SO-6;Lo;0;L;;;;;N;;;;;
+1B05D;HENTAIGANA LETTER SO-7;Lo;0;L;;;;;N;;;;;
+1B05E;HENTAIGANA LETTER TA-1;Lo;0;L;;;;;N;;;;;
+1B05F;HENTAIGANA LETTER TA-2;Lo;0;L;;;;;N;;;;;
+1B060;HENTAIGANA LETTER TA-3;Lo;0;L;;;;;N;;;;;
+1B061;HENTAIGANA LETTER TA-4;Lo;0;L;;;;;N;;;;;
+1B062;HENTAIGANA LETTER TI-1;Lo;0;L;;;;;N;;;;;
+1B063;HENTAIGANA LETTER TI-2;Lo;0;L;;;;;N;;;;;
+1B064;HENTAIGANA LETTER TI-3;Lo;0;L;;;;;N;;;;;
+1B065;HENTAIGANA LETTER TI-4;Lo;0;L;;;;;N;;;;;
+1B066;HENTAIGANA LETTER TI-5;Lo;0;L;;;;;N;;;;;
+1B067;HENTAIGANA LETTER TI-6;Lo;0;L;;;;;N;;;;;
+1B068;HENTAIGANA LETTER TI-7;Lo;0;L;;;;;N;;;;;
+1B069;HENTAIGANA LETTER TU-1;Lo;0;L;;;;;N;;;;;
+1B06A;HENTAIGANA LETTER TU-2;Lo;0;L;;;;;N;;;;;
+1B06B;HENTAIGANA LETTER TU-3;Lo;0;L;;;;;N;;;;;
+1B06C;HENTAIGANA LETTER TU-4;Lo;0;L;;;;;N;;;;;
+1B06D;HENTAIGANA LETTER TU-TO;Lo;0;L;;;;;N;;;;;
+1B06E;HENTAIGANA LETTER TE-1;Lo;0;L;;;;;N;;;;;
+1B06F;HENTAIGANA LETTER TE-2;Lo;0;L;;;;;N;;;;;
+1B070;HENTAIGANA LETTER TE-3;Lo;0;L;;;;;N;;;;;
+1B071;HENTAIGANA LETTER TE-4;Lo;0;L;;;;;N;;;;;
+1B072;HENTAIGANA LETTER TE-5;Lo;0;L;;;;;N;;;;;
+1B073;HENTAIGANA LETTER TE-6;Lo;0;L;;;;;N;;;;;
+1B074;HENTAIGANA LETTER TE-7;Lo;0;L;;;;;N;;;;;
+1B075;HENTAIGANA LETTER TE-8;Lo;0;L;;;;;N;;;;;
+1B076;HENTAIGANA LETTER TE-9;Lo;0;L;;;;;N;;;;;
+1B077;HENTAIGANA LETTER TO-1;Lo;0;L;;;;;N;;;;;
+1B078;HENTAIGANA LETTER TO-2;Lo;0;L;;;;;N;;;;;
+1B079;HENTAIGANA LETTER TO-3;Lo;0;L;;;;;N;;;;;
+1B07A;HENTAIGANA LETTER TO-4;Lo;0;L;;;;;N;;;;;
+1B07B;HENTAIGANA LETTER TO-5;Lo;0;L;;;;;N;;;;;
+1B07C;HENTAIGANA LETTER TO-6;Lo;0;L;;;;;N;;;;;
+1B07D;HENTAIGANA LETTER TO-RA;Lo;0;L;;;;;N;;;;;
+1B07E;HENTAIGANA LETTER NA-1;Lo;0;L;;;;;N;;;;;
+1B07F;HENTAIGANA LETTER NA-2;Lo;0;L;;;;;N;;;;;
+1B080;HENTAIGANA LETTER NA-3;Lo;0;L;;;;;N;;;;;
+1B081;HENTAIGANA LETTER NA-4;Lo;0;L;;;;;N;;;;;
+1B082;HENTAIGANA LETTER NA-5;Lo;0;L;;;;;N;;;;;
+1B083;HENTAIGANA LETTER NA-6;Lo;0;L;;;;;N;;;;;
+1B084;HENTAIGANA LETTER NA-7;Lo;0;L;;;;;N;;;;;
+1B085;HENTAIGANA LETTER NA-8;Lo;0;L;;;;;N;;;;;
+1B086;HENTAIGANA LETTER NA-9;Lo;0;L;;;;;N;;;;;
+1B087;HENTAIGANA LETTER NI-1;Lo;0;L;;;;;N;;;;;
+1B088;HENTAIGANA LETTER NI-2;Lo;0;L;;;;;N;;;;;
+1B089;HENTAIGANA LETTER NI-3;Lo;0;L;;;;;N;;;;;
+1B08A;HENTAIGANA LETTER NI-4;Lo;0;L;;;;;N;;;;;
+1B08B;HENTAIGANA LETTER NI-5;Lo;0;L;;;;;N;;;;;
+1B08C;HENTAIGANA LETTER NI-6;Lo;0;L;;;;;N;;;;;
+1B08D;HENTAIGANA LETTER NI-7;Lo;0;L;;;;;N;;;;;
+1B08E;HENTAIGANA LETTER NI-TE;Lo;0;L;;;;;N;;;;;
+1B08F;HENTAIGANA LETTER NU-1;Lo;0;L;;;;;N;;;;;
+1B090;HENTAIGANA LETTER NU-2;Lo;0;L;;;;;N;;;;;
+1B091;HENTAIGANA LETTER NU-3;Lo;0;L;;;;;N;;;;;
+1B092;HENTAIGANA LETTER NE-1;Lo;0;L;;;;;N;;;;;
+1B093;HENTAIGANA LETTER NE-2;Lo;0;L;;;;;N;;;;;
+1B094;HENTAIGANA LETTER NE-3;Lo;0;L;;;;;N;;;;;
+1B095;HENTAIGANA LETTER NE-4;Lo;0;L;;;;;N;;;;;
+1B096;HENTAIGANA LETTER NE-5;Lo;0;L;;;;;N;;;;;
+1B097;HENTAIGANA LETTER NE-6;Lo;0;L;;;;;N;;;;;
+1B098;HENTAIGANA LETTER NE-KO;Lo;0;L;;;;;N;;;;;
+1B099;HENTAIGANA LETTER NO-1;Lo;0;L;;;;;N;;;;;
+1B09A;HENTAIGANA LETTER NO-2;Lo;0;L;;;;;N;;;;;
+1B09B;HENTAIGANA LETTER NO-3;Lo;0;L;;;;;N;;;;;
+1B09C;HENTAIGANA LETTER NO-4;Lo;0;L;;;;;N;;;;;
+1B09D;HENTAIGANA LETTER NO-5;Lo;0;L;;;;;N;;;;;
+1B09E;HENTAIGANA LETTER HA-1;Lo;0;L;;;;;N;;;;;
+1B09F;HENTAIGANA LETTER HA-2;Lo;0;L;;;;;N;;;;;
+1B0A0;HENTAIGANA LETTER HA-3;Lo;0;L;;;;;N;;;;;
+1B0A1;HENTAIGANA LETTER HA-4;Lo;0;L;;;;;N;;;;;
+1B0A2;HENTAIGANA LETTER HA-5;Lo;0;L;;;;;N;;;;;
+1B0A3;HENTAIGANA LETTER HA-6;Lo;0;L;;;;;N;;;;;
+1B0A4;HENTAIGANA LETTER HA-7;Lo;0;L;;;;;N;;;;;
+1B0A5;HENTAIGANA LETTER HA-8;Lo;0;L;;;;;N;;;;;
+1B0A6;HENTAIGANA LETTER HA-9;Lo;0;L;;;;;N;;;;;
+1B0A7;HENTAIGANA LETTER HA-10;Lo;0;L;;;;;N;;;;;
+1B0A8;HENTAIGANA LETTER HA-11;Lo;0;L;;;;;N;;;;;
+1B0A9;HENTAIGANA LETTER HI-1;Lo;0;L;;;;;N;;;;;
+1B0AA;HENTAIGANA LETTER HI-2;Lo;0;L;;;;;N;;;;;
+1B0AB;HENTAIGANA LETTER HI-3;Lo;0;L;;;;;N;;;;;
+1B0AC;HENTAIGANA LETTER HI-4;Lo;0;L;;;;;N;;;;;
+1B0AD;HENTAIGANA LETTER HI-5;Lo;0;L;;;;;N;;;;;
+1B0AE;HENTAIGANA LETTER HI-6;Lo;0;L;;;;;N;;;;;
+1B0AF;HENTAIGANA LETTER HI-7;Lo;0;L;;;;;N;;;;;
+1B0B0;HENTAIGANA LETTER HU-1;Lo;0;L;;;;;N;;;;;
+1B0B1;HENTAIGANA LETTER HU-2;Lo;0;L;;;;;N;;;;;
+1B0B2;HENTAIGANA LETTER HU-3;Lo;0;L;;;;;N;;;;;
+1B0B3;HENTAIGANA LETTER HE-1;Lo;0;L;;;;;N;;;;;
+1B0B4;HENTAIGANA LETTER HE-2;Lo;0;L;;;;;N;;;;;
+1B0B5;HENTAIGANA LETTER HE-3;Lo;0;L;;;;;N;;;;;
+1B0B6;HENTAIGANA LETTER HE-4;Lo;0;L;;;;;N;;;;;
+1B0B7;HENTAIGANA LETTER HE-5;Lo;0;L;;;;;N;;;;;
+1B0B8;HENTAIGANA LETTER HE-6;Lo;0;L;;;;;N;;;;;
+1B0B9;HENTAIGANA LETTER HE-7;Lo;0;L;;;;;N;;;;;
+1B0BA;HENTAIGANA LETTER HO-1;Lo;0;L;;;;;N;;;;;
+1B0BB;HENTAIGANA LETTER HO-2;Lo;0;L;;;;;N;;;;;
+1B0BC;HENTAIGANA LETTER HO-3;Lo;0;L;;;;;N;;;;;
+1B0BD;HENTAIGANA LETTER HO-4;Lo;0;L;;;;;N;;;;;
+1B0BE;HENTAIGANA LETTER HO-5;Lo;0;L;;;;;N;;;;;
+1B0BF;HENTAIGANA LETTER HO-6;Lo;0;L;;;;;N;;;;;
+1B0C0;HENTAIGANA LETTER HO-7;Lo;0;L;;;;;N;;;;;
+1B0C1;HENTAIGANA LETTER HO-8;Lo;0;L;;;;;N;;;;;
+1B0C2;HENTAIGANA LETTER MA-1;Lo;0;L;;;;;N;;;;;
+1B0C3;HENTAIGANA LETTER MA-2;Lo;0;L;;;;;N;;;;;
+1B0C4;HENTAIGANA LETTER MA-3;Lo;0;L;;;;;N;;;;;
+1B0C5;HENTAIGANA LETTER MA-4;Lo;0;L;;;;;N;;;;;
+1B0C6;HENTAIGANA LETTER MA-5;Lo;0;L;;;;;N;;;;;
+1B0C7;HENTAIGANA LETTER MA-6;Lo;0;L;;;;;N;;;;;
+1B0C8;HENTAIGANA LETTER MA-7;Lo;0;L;;;;;N;;;;;
+1B0C9;HENTAIGANA LETTER MI-1;Lo;0;L;;;;;N;;;;;
+1B0CA;HENTAIGANA LETTER MI-2;Lo;0;L;;;;;N;;;;;
+1B0CB;HENTAIGANA LETTER MI-3;Lo;0;L;;;;;N;;;;;
+1B0CC;HENTAIGANA LETTER MI-4;Lo;0;L;;;;;N;;;;;
+1B0CD;HENTAIGANA LETTER MI-5;Lo;0;L;;;;;N;;;;;
+1B0CE;HENTAIGANA LETTER MI-6;Lo;0;L;;;;;N;;;;;
+1B0CF;HENTAIGANA LETTER MI-7;Lo;0;L;;;;;N;;;;;
+1B0D0;HENTAIGANA LETTER MU-1;Lo;0;L;;;;;N;;;;;
+1B0D1;HENTAIGANA LETTER MU-2;Lo;0;L;;;;;N;;;;;
+1B0D2;HENTAIGANA LETTER MU-3;Lo;0;L;;;;;N;;;;;
+1B0D3;HENTAIGANA LETTER MU-4;Lo;0;L;;;;;N;;;;;
+1B0D4;HENTAIGANA LETTER ME-1;Lo;0;L;;;;;N;;;;;
+1B0D5;HENTAIGANA LETTER ME-2;Lo;0;L;;;;;N;;;;;
+1B0D6;HENTAIGANA LETTER ME-MA;Lo;0;L;;;;;N;;;;;
+1B0D7;HENTAIGANA LETTER MO-1;Lo;0;L;;;;;N;;;;;
+1B0D8;HENTAIGANA LETTER MO-2;Lo;0;L;;;;;N;;;;;
+1B0D9;HENTAIGANA LETTER MO-3;Lo;0;L;;;;;N;;;;;
+1B0DA;HENTAIGANA LETTER MO-4;Lo;0;L;;;;;N;;;;;
+1B0DB;HENTAIGANA LETTER MO-5;Lo;0;L;;;;;N;;;;;
+1B0DC;HENTAIGANA LETTER MO-6;Lo;0;L;;;;;N;;;;;
+1B0DD;HENTAIGANA LETTER YA-1;Lo;0;L;;;;;N;;;;;
+1B0DE;HENTAIGANA LETTER YA-2;Lo;0;L;;;;;N;;;;;
+1B0DF;HENTAIGANA LETTER YA-3;Lo;0;L;;;;;N;;;;;
+1B0E0;HENTAIGANA LETTER YA-4;Lo;0;L;;;;;N;;;;;
+1B0E1;HENTAIGANA LETTER YA-5;Lo;0;L;;;;;N;;;;;
+1B0E2;HENTAIGANA LETTER YA-YO;Lo;0;L;;;;;N;;;;;
+1B0E3;HENTAIGANA LETTER YU-1;Lo;0;L;;;;;N;;;;;
+1B0E4;HENTAIGANA LETTER YU-2;Lo;0;L;;;;;N;;;;;
+1B0E5;HENTAIGANA LETTER YU-3;Lo;0;L;;;;;N;;;;;
+1B0E6;HENTAIGANA LETTER YU-4;Lo;0;L;;;;;N;;;;;
+1B0E7;HENTAIGANA LETTER YO-1;Lo;0;L;;;;;N;;;;;
+1B0E8;HENTAIGANA LETTER YO-2;Lo;0;L;;;;;N;;;;;
+1B0E9;HENTAIGANA LETTER YO-3;Lo;0;L;;;;;N;;;;;
+1B0EA;HENTAIGANA LETTER YO-4;Lo;0;L;;;;;N;;;;;
+1B0EB;HENTAIGANA LETTER YO-5;Lo;0;L;;;;;N;;;;;
+1B0EC;HENTAIGANA LETTER YO-6;Lo;0;L;;;;;N;;;;;
+1B0ED;HENTAIGANA LETTER RA-1;Lo;0;L;;;;;N;;;;;
+1B0EE;HENTAIGANA LETTER RA-2;Lo;0;L;;;;;N;;;;;
+1B0EF;HENTAIGANA LETTER RA-3;Lo;0;L;;;;;N;;;;;
+1B0F0;HENTAIGANA LETTER RA-4;Lo;0;L;;;;;N;;;;;
+1B0F1;HENTAIGANA LETTER RI-1;Lo;0;L;;;;;N;;;;;
+1B0F2;HENTAIGANA LETTER RI-2;Lo;0;L;;;;;N;;;;;
+1B0F3;HENTAIGANA LETTER RI-3;Lo;0;L;;;;;N;;;;;
+1B0F4;HENTAIGANA LETTER RI-4;Lo;0;L;;;;;N;;;;;
+1B0F5;HENTAIGANA LETTER RI-5;Lo;0;L;;;;;N;;;;;
+1B0F6;HENTAIGANA LETTER RI-6;Lo;0;L;;;;;N;;;;;
+1B0F7;HENTAIGANA LETTER RI-7;Lo;0;L;;;;;N;;;;;
+1B0F8;HENTAIGANA LETTER RU-1;Lo;0;L;;;;;N;;;;;
+1B0F9;HENTAIGANA LETTER RU-2;Lo;0;L;;;;;N;;;;;
+1B0FA;HENTAIGANA LETTER RU-3;Lo;0;L;;;;;N;;;;;
+1B0FB;HENTAIGANA LETTER RU-4;Lo;0;L;;;;;N;;;;;
+1B0FC;HENTAIGANA LETTER RU-5;Lo;0;L;;;;;N;;;;;
+1B0FD;HENTAIGANA LETTER RU-6;Lo;0;L;;;;;N;;;;;
+1B0FE;HENTAIGANA LETTER RE-1;Lo;0;L;;;;;N;;;;;
+1B0FF;HENTAIGANA LETTER RE-2;Lo;0;L;;;;;N;;;;;
+1B100;HENTAIGANA LETTER RE-3;Lo;0;L;;;;;N;;;;;
+1B101;HENTAIGANA LETTER RE-4;Lo;0;L;;;;;N;;;;;
+1B102;HENTAIGANA LETTER RO-1;Lo;0;L;;;;;N;;;;;
+1B103;HENTAIGANA LETTER RO-2;Lo;0;L;;;;;N;;;;;
+1B104;HENTAIGANA LETTER RO-3;Lo;0;L;;;;;N;;;;;
+1B105;HENTAIGANA LETTER RO-4;Lo;0;L;;;;;N;;;;;
+1B106;HENTAIGANA LETTER RO-5;Lo;0;L;;;;;N;;;;;
+1B107;HENTAIGANA LETTER RO-6;Lo;0;L;;;;;N;;;;;
+1B108;HENTAIGANA LETTER WA-1;Lo;0;L;;;;;N;;;;;
+1B109;HENTAIGANA LETTER WA-2;Lo;0;L;;;;;N;;;;;
+1B10A;HENTAIGANA LETTER WA-3;Lo;0;L;;;;;N;;;;;
+1B10B;HENTAIGANA LETTER WA-4;Lo;0;L;;;;;N;;;;;
+1B10C;HENTAIGANA LETTER WA-5;Lo;0;L;;;;;N;;;;;
+1B10D;HENTAIGANA LETTER WI-1;Lo;0;L;;;;;N;;;;;
+1B10E;HENTAIGANA LETTER WI-2;Lo;0;L;;;;;N;;;;;
+1B10F;HENTAIGANA LETTER WI-3;Lo;0;L;;;;;N;;;;;
+1B110;HENTAIGANA LETTER WI-4;Lo;0;L;;;;;N;;;;;
+1B111;HENTAIGANA LETTER WI-5;Lo;0;L;;;;;N;;;;;
+1B112;HENTAIGANA LETTER WE-1;Lo;0;L;;;;;N;;;;;
+1B113;HENTAIGANA LETTER WE-2;Lo;0;L;;;;;N;;;;;
+1B114;HENTAIGANA LETTER WE-3;Lo;0;L;;;;;N;;;;;
+1B115;HENTAIGANA LETTER WE-4;Lo;0;L;;;;;N;;;;;
+1B116;HENTAIGANA LETTER WO-1;Lo;0;L;;;;;N;;;;;
+1B117;HENTAIGANA LETTER WO-2;Lo;0;L;;;;;N;;;;;
+1B118;HENTAIGANA LETTER WO-3;Lo;0;L;;;;;N;;;;;
+1B119;HENTAIGANA LETTER WO-4;Lo;0;L;;;;;N;;;;;
+1B11A;HENTAIGANA LETTER WO-5;Lo;0;L;;;;;N;;;;;
+1B11B;HENTAIGANA LETTER WO-6;Lo;0;L;;;;;N;;;;;
+1B11C;HENTAIGANA LETTER WO-7;Lo;0;L;;;;;N;;;;;
+1B11D;HENTAIGANA LETTER N-MU-MO-1;Lo;0;L;;;;;N;;;;;
+1B11E;HENTAIGANA LETTER N-MU-MO-2;Lo;0;L;;;;;N;;;;;
+1B170;NUSHU CHARACTER-1B170;Lo;0;L;;;;;N;;;;;
+1B171;NUSHU CHARACTER-1B171;Lo;0;L;;;;;N;;;;;
+1B172;NUSHU CHARACTER-1B172;Lo;0;L;;;;;N;;;;;
+1B173;NUSHU CHARACTER-1B173;Lo;0;L;;;;;N;;;;;
+1B174;NUSHU CHARACTER-1B174;Lo;0;L;;;;;N;;;;;
+1B175;NUSHU CHARACTER-1B175;Lo;0;L;;;;;N;;;;;
+1B176;NUSHU CHARACTER-1B176;Lo;0;L;;;;;N;;;;;
+1B177;NUSHU CHARACTER-1B177;Lo;0;L;;;;;N;;;;;
+1B178;NUSHU CHARACTER-1B178;Lo;0;L;;;;;N;;;;;
+1B179;NUSHU CHARACTER-1B179;Lo;0;L;;;;;N;;;;;
+1B17A;NUSHU CHARACTER-1B17A;Lo;0;L;;;;;N;;;;;
+1B17B;NUSHU CHARACTER-1B17B;Lo;0;L;;;;;N;;;;;
+1B17C;NUSHU CHARACTER-1B17C;Lo;0;L;;;;;N;;;;;
+1B17D;NUSHU CHARACTER-1B17D;Lo;0;L;;;;;N;;;;;
+1B17E;NUSHU CHARACTER-1B17E;Lo;0;L;;;;;N;;;;;
+1B17F;NUSHU CHARACTER-1B17F;Lo;0;L;;;;;N;;;;;
+1B180;NUSHU CHARACTER-1B180;Lo;0;L;;;;;N;;;;;
+1B181;NUSHU CHARACTER-1B181;Lo;0;L;;;;;N;;;;;
+1B182;NUSHU CHARACTER-1B182;Lo;0;L;;;;;N;;;;;
+1B183;NUSHU CHARACTER-1B183;Lo;0;L;;;;;N;;;;;
+1B184;NUSHU CHARACTER-1B184;Lo;0;L;;;;;N;;;;;
+1B185;NUSHU CHARACTER-1B185;Lo;0;L;;;;;N;;;;;
+1B186;NUSHU CHARACTER-1B186;Lo;0;L;;;;;N;;;;;
+1B187;NUSHU CHARACTER-1B187;Lo;0;L;;;;;N;;;;;
+1B188;NUSHU CHARACTER-1B188;Lo;0;L;;;;;N;;;;;
+1B189;NUSHU CHARACTER-1B189;Lo;0;L;;;;;N;;;;;
+1B18A;NUSHU CHARACTER-1B18A;Lo;0;L;;;;;N;;;;;
+1B18B;NUSHU CHARACTER-1B18B;Lo;0;L;;;;;N;;;;;
+1B18C;NUSHU CHARACTER-1B18C;Lo;0;L;;;;;N;;;;;
+1B18D;NUSHU CHARACTER-1B18D;Lo;0;L;;;;;N;;;;;
+1B18E;NUSHU CHARACTER-1B18E;Lo;0;L;;;;;N;;;;;
+1B18F;NUSHU CHARACTER-1B18F;Lo;0;L;;;;;N;;;;;
+1B190;NUSHU CHARACTER-1B190;Lo;0;L;;;;;N;;;;;
+1B191;NUSHU CHARACTER-1B191;Lo;0;L;;;;;N;;;;;
+1B192;NUSHU CHARACTER-1B192;Lo;0;L;;;;;N;;;;;
+1B193;NUSHU CHARACTER-1B193;Lo;0;L;;;;;N;;;;;
+1B194;NUSHU CHARACTER-1B194;Lo;0;L;;;;;N;;;;;
+1B195;NUSHU CHARACTER-1B195;Lo;0;L;;;;;N;;;;;
+1B196;NUSHU CHARACTER-1B196;Lo;0;L;;;;;N;;;;;
+1B197;NUSHU CHARACTER-1B197;Lo;0;L;;;;;N;;;;;
+1B198;NUSHU CHARACTER-1B198;Lo;0;L;;;;;N;;;;;
+1B199;NUSHU CHARACTER-1B199;Lo;0;L;;;;;N;;;;;
+1B19A;NUSHU CHARACTER-1B19A;Lo;0;L;;;;;N;;;;;
+1B19B;NUSHU CHARACTER-1B19B;Lo;0;L;;;;;N;;;;;
+1B19C;NUSHU CHARACTER-1B19C;Lo;0;L;;;;;N;;;;;
+1B19D;NUSHU CHARACTER-1B19D;Lo;0;L;;;;;N;;;;;
+1B19E;NUSHU CHARACTER-1B19E;Lo;0;L;;;;;N;;;;;
+1B19F;NUSHU CHARACTER-1B19F;Lo;0;L;;;;;N;;;;;
+1B1A0;NUSHU CHARACTER-1B1A0;Lo;0;L;;;;;N;;;;;
+1B1A1;NUSHU CHARACTER-1B1A1;Lo;0;L;;;;;N;;;;;
+1B1A2;NUSHU CHARACTER-1B1A2;Lo;0;L;;;;;N;;;;;
+1B1A3;NUSHU CHARACTER-1B1A3;Lo;0;L;;;;;N;;;;;
+1B1A4;NUSHU CHARACTER-1B1A4;Lo;0;L;;;;;N;;;;;
+1B1A5;NUSHU CHARACTER-1B1A5;Lo;0;L;;;;;N;;;;;
+1B1A6;NUSHU CHARACTER-1B1A6;Lo;0;L;;;;;N;;;;;
+1B1A7;NUSHU CHARACTER-1B1A7;Lo;0;L;;;;;N;;;;;
+1B1A8;NUSHU CHARACTER-1B1A8;Lo;0;L;;;;;N;;;;;
+1B1A9;NUSHU CHARACTER-1B1A9;Lo;0;L;;;;;N;;;;;
+1B1AA;NUSHU CHARACTER-1B1AA;Lo;0;L;;;;;N;;;;;
+1B1AB;NUSHU CHARACTER-1B1AB;Lo;0;L;;;;;N;;;;;
+1B1AC;NUSHU CHARACTER-1B1AC;Lo;0;L;;;;;N;;;;;
+1B1AD;NUSHU CHARACTER-1B1AD;Lo;0;L;;;;;N;;;;;
+1B1AE;NUSHU CHARACTER-1B1AE;Lo;0;L;;;;;N;;;;;
+1B1AF;NUSHU CHARACTER-1B1AF;Lo;0;L;;;;;N;;;;;
+1B1B0;NUSHU CHARACTER-1B1B0;Lo;0;L;;;;;N;;;;;
+1B1B1;NUSHU CHARACTER-1B1B1;Lo;0;L;;;;;N;;;;;
+1B1B2;NUSHU CHARACTER-1B1B2;Lo;0;L;;;;;N;;;;;
+1B1B3;NUSHU CHARACTER-1B1B3;Lo;0;L;;;;;N;;;;;
+1B1B4;NUSHU CHARACTER-1B1B4;Lo;0;L;;;;;N;;;;;
+1B1B5;NUSHU CHARACTER-1B1B5;Lo;0;L;;;;;N;;;;;
+1B1B6;NUSHU CHARACTER-1B1B6;Lo;0;L;;;;;N;;;;;
+1B1B7;NUSHU CHARACTER-1B1B7;Lo;0;L;;;;;N;;;;;
+1B1B8;NUSHU CHARACTER-1B1B8;Lo;0;L;;;;;N;;;;;
+1B1B9;NUSHU CHARACTER-1B1B9;Lo;0;L;;;;;N;;;;;
+1B1BA;NUSHU CHARACTER-1B1BA;Lo;0;L;;;;;N;;;;;
+1B1BB;NUSHU CHARACTER-1B1BB;Lo;0;L;;;;;N;;;;;
+1B1BC;NUSHU CHARACTER-1B1BC;Lo;0;L;;;;;N;;;;;
+1B1BD;NUSHU CHARACTER-1B1BD;Lo;0;L;;;;;N;;;;;
+1B1BE;NUSHU CHARACTER-1B1BE;Lo;0;L;;;;;N;;;;;
+1B1BF;NUSHU CHARACTER-1B1BF;Lo;0;L;;;;;N;;;;;
+1B1C0;NUSHU CHARACTER-1B1C0;Lo;0;L;;;;;N;;;;;
+1B1C1;NUSHU CHARACTER-1B1C1;Lo;0;L;;;;;N;;;;;
+1B1C2;NUSHU CHARACTER-1B1C2;Lo;0;L;;;;;N;;;;;
+1B1C3;NUSHU CHARACTER-1B1C3;Lo;0;L;;;;;N;;;;;
+1B1C4;NUSHU CHARACTER-1B1C4;Lo;0;L;;;;;N;;;;;
+1B1C5;NUSHU CHARACTER-1B1C5;Lo;0;L;;;;;N;;;;;
+1B1C6;NUSHU CHARACTER-1B1C6;Lo;0;L;;;;;N;;;;;
+1B1C7;NUSHU CHARACTER-1B1C7;Lo;0;L;;;;;N;;;;;
+1B1C8;NUSHU CHARACTER-1B1C8;Lo;0;L;;;;;N;;;;;
+1B1C9;NUSHU CHARACTER-1B1C9;Lo;0;L;;;;;N;;;;;
+1B1CA;NUSHU CHARACTER-1B1CA;Lo;0;L;;;;;N;;;;;
+1B1CB;NUSHU CHARACTER-1B1CB;Lo;0;L;;;;;N;;;;;
+1B1CC;NUSHU CHARACTER-1B1CC;Lo;0;L;;;;;N;;;;;
+1B1CD;NUSHU CHARACTER-1B1CD;Lo;0;L;;;;;N;;;;;
+1B1CE;NUSHU CHARACTER-1B1CE;Lo;0;L;;;;;N;;;;;
+1B1CF;NUSHU CHARACTER-1B1CF;Lo;0;L;;;;;N;;;;;
+1B1D0;NUSHU CHARACTER-1B1D0;Lo;0;L;;;;;N;;;;;
+1B1D1;NUSHU CHARACTER-1B1D1;Lo;0;L;;;;;N;;;;;
+1B1D2;NUSHU CHARACTER-1B1D2;Lo;0;L;;;;;N;;;;;
+1B1D3;NUSHU CHARACTER-1B1D3;Lo;0;L;;;;;N;;;;;
+1B1D4;NUSHU CHARACTER-1B1D4;Lo;0;L;;;;;N;;;;;
+1B1D5;NUSHU CHARACTER-1B1D5;Lo;0;L;;;;;N;;;;;
+1B1D6;NUSHU CHARACTER-1B1D6;Lo;0;L;;;;;N;;;;;
+1B1D7;NUSHU CHARACTER-1B1D7;Lo;0;L;;;;;N;;;;;
+1B1D8;NUSHU CHARACTER-1B1D8;Lo;0;L;;;;;N;;;;;
+1B1D9;NUSHU CHARACTER-1B1D9;Lo;0;L;;;;;N;;;;;
+1B1DA;NUSHU CHARACTER-1B1DA;Lo;0;L;;;;;N;;;;;
+1B1DB;NUSHU CHARACTER-1B1DB;Lo;0;L;;;;;N;;;;;
+1B1DC;NUSHU CHARACTER-1B1DC;Lo;0;L;;;;;N;;;;;
+1B1DD;NUSHU CHARACTER-1B1DD;Lo;0;L;;;;;N;;;;;
+1B1DE;NUSHU CHARACTER-1B1DE;Lo;0;L;;;;;N;;;;;
+1B1DF;NUSHU CHARACTER-1B1DF;Lo;0;L;;;;;N;;;;;
+1B1E0;NUSHU CHARACTER-1B1E0;Lo;0;L;;;;;N;;;;;
+1B1E1;NUSHU CHARACTER-1B1E1;Lo;0;L;;;;;N;;;;;
+1B1E2;NUSHU CHARACTER-1B1E2;Lo;0;L;;;;;N;;;;;
+1B1E3;NUSHU CHARACTER-1B1E3;Lo;0;L;;;;;N;;;;;
+1B1E4;NUSHU CHARACTER-1B1E4;Lo;0;L;;;;;N;;;;;
+1B1E5;NUSHU CHARACTER-1B1E5;Lo;0;L;;;;;N;;;;;
+1B1E6;NUSHU CHARACTER-1B1E6;Lo;0;L;;;;;N;;;;;
+1B1E7;NUSHU CHARACTER-1B1E7;Lo;0;L;;;;;N;;;;;
+1B1E8;NUSHU CHARACTER-1B1E8;Lo;0;L;;;;;N;;;;;
+1B1E9;NUSHU CHARACTER-1B1E9;Lo;0;L;;;;;N;;;;;
+1B1EA;NUSHU CHARACTER-1B1EA;Lo;0;L;;;;;N;;;;;
+1B1EB;NUSHU CHARACTER-1B1EB;Lo;0;L;;;;;N;;;;;
+1B1EC;NUSHU CHARACTER-1B1EC;Lo;0;L;;;;;N;;;;;
+1B1ED;NUSHU CHARACTER-1B1ED;Lo;0;L;;;;;N;;;;;
+1B1EE;NUSHU CHARACTER-1B1EE;Lo;0;L;;;;;N;;;;;
+1B1EF;NUSHU CHARACTER-1B1EF;Lo;0;L;;;;;N;;;;;
+1B1F0;NUSHU CHARACTER-1B1F0;Lo;0;L;;;;;N;;;;;
+1B1F1;NUSHU CHARACTER-1B1F1;Lo;0;L;;;;;N;;;;;
+1B1F2;NUSHU CHARACTER-1B1F2;Lo;0;L;;;;;N;;;;;
+1B1F3;NUSHU CHARACTER-1B1F3;Lo;0;L;;;;;N;;;;;
+1B1F4;NUSHU CHARACTER-1B1F4;Lo;0;L;;;;;N;;;;;
+1B1F5;NUSHU CHARACTER-1B1F5;Lo;0;L;;;;;N;;;;;
+1B1F6;NUSHU CHARACTER-1B1F6;Lo;0;L;;;;;N;;;;;
+1B1F7;NUSHU CHARACTER-1B1F7;Lo;0;L;;;;;N;;;;;
+1B1F8;NUSHU CHARACTER-1B1F8;Lo;0;L;;;;;N;;;;;
+1B1F9;NUSHU CHARACTER-1B1F9;Lo;0;L;;;;;N;;;;;
+1B1FA;NUSHU CHARACTER-1B1FA;Lo;0;L;;;;;N;;;;;
+1B1FB;NUSHU CHARACTER-1B1FB;Lo;0;L;;;;;N;;;;;
+1B1FC;NUSHU CHARACTER-1B1FC;Lo;0;L;;;;;N;;;;;
+1B1FD;NUSHU CHARACTER-1B1FD;Lo;0;L;;;;;N;;;;;
+1B1FE;NUSHU CHARACTER-1B1FE;Lo;0;L;;;;;N;;;;;
+1B1FF;NUSHU CHARACTER-1B1FF;Lo;0;L;;;;;N;;;;;
+1B200;NUSHU CHARACTER-1B200;Lo;0;L;;;;;N;;;;;
+1B201;NUSHU CHARACTER-1B201;Lo;0;L;;;;;N;;;;;
+1B202;NUSHU CHARACTER-1B202;Lo;0;L;;;;;N;;;;;
+1B203;NUSHU CHARACTER-1B203;Lo;0;L;;;;;N;;;;;
+1B204;NUSHU CHARACTER-1B204;Lo;0;L;;;;;N;;;;;
+1B205;NUSHU CHARACTER-1B205;Lo;0;L;;;;;N;;;;;
+1B206;NUSHU CHARACTER-1B206;Lo;0;L;;;;;N;;;;;
+1B207;NUSHU CHARACTER-1B207;Lo;0;L;;;;;N;;;;;
+1B208;NUSHU CHARACTER-1B208;Lo;0;L;;;;;N;;;;;
+1B209;NUSHU CHARACTER-1B209;Lo;0;L;;;;;N;;;;;
+1B20A;NUSHU CHARACTER-1B20A;Lo;0;L;;;;;N;;;;;
+1B20B;NUSHU CHARACTER-1B20B;Lo;0;L;;;;;N;;;;;
+1B20C;NUSHU CHARACTER-1B20C;Lo;0;L;;;;;N;;;;;
+1B20D;NUSHU CHARACTER-1B20D;Lo;0;L;;;;;N;;;;;
+1B20E;NUSHU CHARACTER-1B20E;Lo;0;L;;;;;N;;;;;
+1B20F;NUSHU CHARACTER-1B20F;Lo;0;L;;;;;N;;;;;
+1B210;NUSHU CHARACTER-1B210;Lo;0;L;;;;;N;;;;;
+1B211;NUSHU CHARACTER-1B211;Lo;0;L;;;;;N;;;;;
+1B212;NUSHU CHARACTER-1B212;Lo;0;L;;;;;N;;;;;
+1B213;NUSHU CHARACTER-1B213;Lo;0;L;;;;;N;;;;;
+1B214;NUSHU CHARACTER-1B214;Lo;0;L;;;;;N;;;;;
+1B215;NUSHU CHARACTER-1B215;Lo;0;L;;;;;N;;;;;
+1B216;NUSHU CHARACTER-1B216;Lo;0;L;;;;;N;;;;;
+1B217;NUSHU CHARACTER-1B217;Lo;0;L;;;;;N;;;;;
+1B218;NUSHU CHARACTER-1B218;Lo;0;L;;;;;N;;;;;
+1B219;NUSHU CHARACTER-1B219;Lo;0;L;;;;;N;;;;;
+1B21A;NUSHU CHARACTER-1B21A;Lo;0;L;;;;;N;;;;;
+1B21B;NUSHU CHARACTER-1B21B;Lo;0;L;;;;;N;;;;;
+1B21C;NUSHU CHARACTER-1B21C;Lo;0;L;;;;;N;;;;;
+1B21D;NUSHU CHARACTER-1B21D;Lo;0;L;;;;;N;;;;;
+1B21E;NUSHU CHARACTER-1B21E;Lo;0;L;;;;;N;;;;;
+1B21F;NUSHU CHARACTER-1B21F;Lo;0;L;;;;;N;;;;;
+1B220;NUSHU CHARACTER-1B220;Lo;0;L;;;;;N;;;;;
+1B221;NUSHU CHARACTER-1B221;Lo;0;L;;;;;N;;;;;
+1B222;NUSHU CHARACTER-1B222;Lo;0;L;;;;;N;;;;;
+1B223;NUSHU CHARACTER-1B223;Lo;0;L;;;;;N;;;;;
+1B224;NUSHU CHARACTER-1B224;Lo;0;L;;;;;N;;;;;
+1B225;NUSHU CHARACTER-1B225;Lo;0;L;;;;;N;;;;;
+1B226;NUSHU CHARACTER-1B226;Lo;0;L;;;;;N;;;;;
+1B227;NUSHU CHARACTER-1B227;Lo;0;L;;;;;N;;;;;
+1B228;NUSHU CHARACTER-1B228;Lo;0;L;;;;;N;;;;;
+1B229;NUSHU CHARACTER-1B229;Lo;0;L;;;;;N;;;;;
+1B22A;NUSHU CHARACTER-1B22A;Lo;0;L;;;;;N;;;;;
+1B22B;NUSHU CHARACTER-1B22B;Lo;0;L;;;;;N;;;;;
+1B22C;NUSHU CHARACTER-1B22C;Lo;0;L;;;;;N;;;;;
+1B22D;NUSHU CHARACTER-1B22D;Lo;0;L;;;;;N;;;;;
+1B22E;NUSHU CHARACTER-1B22E;Lo;0;L;;;;;N;;;;;
+1B22F;NUSHU CHARACTER-1B22F;Lo;0;L;;;;;N;;;;;
+1B230;NUSHU CHARACTER-1B230;Lo;0;L;;;;;N;;;;;
+1B231;NUSHU CHARACTER-1B231;Lo;0;L;;;;;N;;;;;
+1B232;NUSHU CHARACTER-1B232;Lo;0;L;;;;;N;;;;;
+1B233;NUSHU CHARACTER-1B233;Lo;0;L;;;;;N;;;;;
+1B234;NUSHU CHARACTER-1B234;Lo;0;L;;;;;N;;;;;
+1B235;NUSHU CHARACTER-1B235;Lo;0;L;;;;;N;;;;;
+1B236;NUSHU CHARACTER-1B236;Lo;0;L;;;;;N;;;;;
+1B237;NUSHU CHARACTER-1B237;Lo;0;L;;;;;N;;;;;
+1B238;NUSHU CHARACTER-1B238;Lo;0;L;;;;;N;;;;;
+1B239;NUSHU CHARACTER-1B239;Lo;0;L;;;;;N;;;;;
+1B23A;NUSHU CHARACTER-1B23A;Lo;0;L;;;;;N;;;;;
+1B23B;NUSHU CHARACTER-1B23B;Lo;0;L;;;;;N;;;;;
+1B23C;NUSHU CHARACTER-1B23C;Lo;0;L;;;;;N;;;;;
+1B23D;NUSHU CHARACTER-1B23D;Lo;0;L;;;;;N;;;;;
+1B23E;NUSHU CHARACTER-1B23E;Lo;0;L;;;;;N;;;;;
+1B23F;NUSHU CHARACTER-1B23F;Lo;0;L;;;;;N;;;;;
+1B240;NUSHU CHARACTER-1B240;Lo;0;L;;;;;N;;;;;
+1B241;NUSHU CHARACTER-1B241;Lo;0;L;;;;;N;;;;;
+1B242;NUSHU CHARACTER-1B242;Lo;0;L;;;;;N;;;;;
+1B243;NUSHU CHARACTER-1B243;Lo;0;L;;;;;N;;;;;
+1B244;NUSHU CHARACTER-1B244;Lo;0;L;;;;;N;;;;;
+1B245;NUSHU CHARACTER-1B245;Lo;0;L;;;;;N;;;;;
+1B246;NUSHU CHARACTER-1B246;Lo;0;L;;;;;N;;;;;
+1B247;NUSHU CHARACTER-1B247;Lo;0;L;;;;;N;;;;;
+1B248;NUSHU CHARACTER-1B248;Lo;0;L;;;;;N;;;;;
+1B249;NUSHU CHARACTER-1B249;Lo;0;L;;;;;N;;;;;
+1B24A;NUSHU CHARACTER-1B24A;Lo;0;L;;;;;N;;;;;
+1B24B;NUSHU CHARACTER-1B24B;Lo;0;L;;;;;N;;;;;
+1B24C;NUSHU CHARACTER-1B24C;Lo;0;L;;;;;N;;;;;
+1B24D;NUSHU CHARACTER-1B24D;Lo;0;L;;;;;N;;;;;
+1B24E;NUSHU CHARACTER-1B24E;Lo;0;L;;;;;N;;;;;
+1B24F;NUSHU CHARACTER-1B24F;Lo;0;L;;;;;N;;;;;
+1B250;NUSHU CHARACTER-1B250;Lo;0;L;;;;;N;;;;;
+1B251;NUSHU CHARACTER-1B251;Lo;0;L;;;;;N;;;;;
+1B252;NUSHU CHARACTER-1B252;Lo;0;L;;;;;N;;;;;
+1B253;NUSHU CHARACTER-1B253;Lo;0;L;;;;;N;;;;;
+1B254;NUSHU CHARACTER-1B254;Lo;0;L;;;;;N;;;;;
+1B255;NUSHU CHARACTER-1B255;Lo;0;L;;;;;N;;;;;
+1B256;NUSHU CHARACTER-1B256;Lo;0;L;;;;;N;;;;;
+1B257;NUSHU CHARACTER-1B257;Lo;0;L;;;;;N;;;;;
+1B258;NUSHU CHARACTER-1B258;Lo;0;L;;;;;N;;;;;
+1B259;NUSHU CHARACTER-1B259;Lo;0;L;;;;;N;;;;;
+1B25A;NUSHU CHARACTER-1B25A;Lo;0;L;;;;;N;;;;;
+1B25B;NUSHU CHARACTER-1B25B;Lo;0;L;;;;;N;;;;;
+1B25C;NUSHU CHARACTER-1B25C;Lo;0;L;;;;;N;;;;;
+1B25D;NUSHU CHARACTER-1B25D;Lo;0;L;;;;;N;;;;;
+1B25E;NUSHU CHARACTER-1B25E;Lo;0;L;;;;;N;;;;;
+1B25F;NUSHU CHARACTER-1B25F;Lo;0;L;;;;;N;;;;;
+1B260;NUSHU CHARACTER-1B260;Lo;0;L;;;;;N;;;;;
+1B261;NUSHU CHARACTER-1B261;Lo;0;L;;;;;N;;;;;
+1B262;NUSHU CHARACTER-1B262;Lo;0;L;;;;;N;;;;;
+1B263;NUSHU CHARACTER-1B263;Lo;0;L;;;;;N;;;;;
+1B264;NUSHU CHARACTER-1B264;Lo;0;L;;;;;N;;;;;
+1B265;NUSHU CHARACTER-1B265;Lo;0;L;;;;;N;;;;;
+1B266;NUSHU CHARACTER-1B266;Lo;0;L;;;;;N;;;;;
+1B267;NUSHU CHARACTER-1B267;Lo;0;L;;;;;N;;;;;
+1B268;NUSHU CHARACTER-1B268;Lo;0;L;;;;;N;;;;;
+1B269;NUSHU CHARACTER-1B269;Lo;0;L;;;;;N;;;;;
+1B26A;NUSHU CHARACTER-1B26A;Lo;0;L;;;;;N;;;;;
+1B26B;NUSHU CHARACTER-1B26B;Lo;0;L;;;;;N;;;;;
+1B26C;NUSHU CHARACTER-1B26C;Lo;0;L;;;;;N;;;;;
+1B26D;NUSHU CHARACTER-1B26D;Lo;0;L;;;;;N;;;;;
+1B26E;NUSHU CHARACTER-1B26E;Lo;0;L;;;;;N;;;;;
+1B26F;NUSHU CHARACTER-1B26F;Lo;0;L;;;;;N;;;;;
+1B270;NUSHU CHARACTER-1B270;Lo;0;L;;;;;N;;;;;
+1B271;NUSHU CHARACTER-1B271;Lo;0;L;;;;;N;;;;;
+1B272;NUSHU CHARACTER-1B272;Lo;0;L;;;;;N;;;;;
+1B273;NUSHU CHARACTER-1B273;Lo;0;L;;;;;N;;;;;
+1B274;NUSHU CHARACTER-1B274;Lo;0;L;;;;;N;;;;;
+1B275;NUSHU CHARACTER-1B275;Lo;0;L;;;;;N;;;;;
+1B276;NUSHU CHARACTER-1B276;Lo;0;L;;;;;N;;;;;
+1B277;NUSHU CHARACTER-1B277;Lo;0;L;;;;;N;;;;;
+1B278;NUSHU CHARACTER-1B278;Lo;0;L;;;;;N;;;;;
+1B279;NUSHU CHARACTER-1B279;Lo;0;L;;;;;N;;;;;
+1B27A;NUSHU CHARACTER-1B27A;Lo;0;L;;;;;N;;;;;
+1B27B;NUSHU CHARACTER-1B27B;Lo;0;L;;;;;N;;;;;
+1B27C;NUSHU CHARACTER-1B27C;Lo;0;L;;;;;N;;;;;
+1B27D;NUSHU CHARACTER-1B27D;Lo;0;L;;;;;N;;;;;
+1B27E;NUSHU CHARACTER-1B27E;Lo;0;L;;;;;N;;;;;
+1B27F;NUSHU CHARACTER-1B27F;Lo;0;L;;;;;N;;;;;
+1B280;NUSHU CHARACTER-1B280;Lo;0;L;;;;;N;;;;;
+1B281;NUSHU CHARACTER-1B281;Lo;0;L;;;;;N;;;;;
+1B282;NUSHU CHARACTER-1B282;Lo;0;L;;;;;N;;;;;
+1B283;NUSHU CHARACTER-1B283;Lo;0;L;;;;;N;;;;;
+1B284;NUSHU CHARACTER-1B284;Lo;0;L;;;;;N;;;;;
+1B285;NUSHU CHARACTER-1B285;Lo;0;L;;;;;N;;;;;
+1B286;NUSHU CHARACTER-1B286;Lo;0;L;;;;;N;;;;;
+1B287;NUSHU CHARACTER-1B287;Lo;0;L;;;;;N;;;;;
+1B288;NUSHU CHARACTER-1B288;Lo;0;L;;;;;N;;;;;
+1B289;NUSHU CHARACTER-1B289;Lo;0;L;;;;;N;;;;;
+1B28A;NUSHU CHARACTER-1B28A;Lo;0;L;;;;;N;;;;;
+1B28B;NUSHU CHARACTER-1B28B;Lo;0;L;;;;;N;;;;;
+1B28C;NUSHU CHARACTER-1B28C;Lo;0;L;;;;;N;;;;;
+1B28D;NUSHU CHARACTER-1B28D;Lo;0;L;;;;;N;;;;;
+1B28E;NUSHU CHARACTER-1B28E;Lo;0;L;;;;;N;;;;;
+1B28F;NUSHU CHARACTER-1B28F;Lo;0;L;;;;;N;;;;;
+1B290;NUSHU CHARACTER-1B290;Lo;0;L;;;;;N;;;;;
+1B291;NUSHU CHARACTER-1B291;Lo;0;L;;;;;N;;;;;
+1B292;NUSHU CHARACTER-1B292;Lo;0;L;;;;;N;;;;;
+1B293;NUSHU CHARACTER-1B293;Lo;0;L;;;;;N;;;;;
+1B294;NUSHU CHARACTER-1B294;Lo;0;L;;;;;N;;;;;
+1B295;NUSHU CHARACTER-1B295;Lo;0;L;;;;;N;;;;;
+1B296;NUSHU CHARACTER-1B296;Lo;0;L;;;;;N;;;;;
+1B297;NUSHU CHARACTER-1B297;Lo;0;L;;;;;N;;;;;
+1B298;NUSHU CHARACTER-1B298;Lo;0;L;;;;;N;;;;;
+1B299;NUSHU CHARACTER-1B299;Lo;0;L;;;;;N;;;;;
+1B29A;NUSHU CHARACTER-1B29A;Lo;0;L;;;;;N;;;;;
+1B29B;NUSHU CHARACTER-1B29B;Lo;0;L;;;;;N;;;;;
+1B29C;NUSHU CHARACTER-1B29C;Lo;0;L;;;;;N;;;;;
+1B29D;NUSHU CHARACTER-1B29D;Lo;0;L;;;;;N;;;;;
+1B29E;NUSHU CHARACTER-1B29E;Lo;0;L;;;;;N;;;;;
+1B29F;NUSHU CHARACTER-1B29F;Lo;0;L;;;;;N;;;;;
+1B2A0;NUSHU CHARACTER-1B2A0;Lo;0;L;;;;;N;;;;;
+1B2A1;NUSHU CHARACTER-1B2A1;Lo;0;L;;;;;N;;;;;
+1B2A2;NUSHU CHARACTER-1B2A2;Lo;0;L;;;;;N;;;;;
+1B2A3;NUSHU CHARACTER-1B2A3;Lo;0;L;;;;;N;;;;;
+1B2A4;NUSHU CHARACTER-1B2A4;Lo;0;L;;;;;N;;;;;
+1B2A5;NUSHU CHARACTER-1B2A5;Lo;0;L;;;;;N;;;;;
+1B2A6;NUSHU CHARACTER-1B2A6;Lo;0;L;;;;;N;;;;;
+1B2A7;NUSHU CHARACTER-1B2A7;Lo;0;L;;;;;N;;;;;
+1B2A8;NUSHU CHARACTER-1B2A8;Lo;0;L;;;;;N;;;;;
+1B2A9;NUSHU CHARACTER-1B2A9;Lo;0;L;;;;;N;;;;;
+1B2AA;NUSHU CHARACTER-1B2AA;Lo;0;L;;;;;N;;;;;
+1B2AB;NUSHU CHARACTER-1B2AB;Lo;0;L;;;;;N;;;;;
+1B2AC;NUSHU CHARACTER-1B2AC;Lo;0;L;;;;;N;;;;;
+1B2AD;NUSHU CHARACTER-1B2AD;Lo;0;L;;;;;N;;;;;
+1B2AE;NUSHU CHARACTER-1B2AE;Lo;0;L;;;;;N;;;;;
+1B2AF;NUSHU CHARACTER-1B2AF;Lo;0;L;;;;;N;;;;;
+1B2B0;NUSHU CHARACTER-1B2B0;Lo;0;L;;;;;N;;;;;
+1B2B1;NUSHU CHARACTER-1B2B1;Lo;0;L;;;;;N;;;;;
+1B2B2;NUSHU CHARACTER-1B2B2;Lo;0;L;;;;;N;;;;;
+1B2B3;NUSHU CHARACTER-1B2B3;Lo;0;L;;;;;N;;;;;
+1B2B4;NUSHU CHARACTER-1B2B4;Lo;0;L;;;;;N;;;;;
+1B2B5;NUSHU CHARACTER-1B2B5;Lo;0;L;;;;;N;;;;;
+1B2B6;NUSHU CHARACTER-1B2B6;Lo;0;L;;;;;N;;;;;
+1B2B7;NUSHU CHARACTER-1B2B7;Lo;0;L;;;;;N;;;;;
+1B2B8;NUSHU CHARACTER-1B2B8;Lo;0;L;;;;;N;;;;;
+1B2B9;NUSHU CHARACTER-1B2B9;Lo;0;L;;;;;N;;;;;
+1B2BA;NUSHU CHARACTER-1B2BA;Lo;0;L;;;;;N;;;;;
+1B2BB;NUSHU CHARACTER-1B2BB;Lo;0;L;;;;;N;;;;;
+1B2BC;NUSHU CHARACTER-1B2BC;Lo;0;L;;;;;N;;;;;
+1B2BD;NUSHU CHARACTER-1B2BD;Lo;0;L;;;;;N;;;;;
+1B2BE;NUSHU CHARACTER-1B2BE;Lo;0;L;;;;;N;;;;;
+1B2BF;NUSHU CHARACTER-1B2BF;Lo;0;L;;;;;N;;;;;
+1B2C0;NUSHU CHARACTER-1B2C0;Lo;0;L;;;;;N;;;;;
+1B2C1;NUSHU CHARACTER-1B2C1;Lo;0;L;;;;;N;;;;;
+1B2C2;NUSHU CHARACTER-1B2C2;Lo;0;L;;;;;N;;;;;
+1B2C3;NUSHU CHARACTER-1B2C3;Lo;0;L;;;;;N;;;;;
+1B2C4;NUSHU CHARACTER-1B2C4;Lo;0;L;;;;;N;;;;;
+1B2C5;NUSHU CHARACTER-1B2C5;Lo;0;L;;;;;N;;;;;
+1B2C6;NUSHU CHARACTER-1B2C6;Lo;0;L;;;;;N;;;;;
+1B2C7;NUSHU CHARACTER-1B2C7;Lo;0;L;;;;;N;;;;;
+1B2C8;NUSHU CHARACTER-1B2C8;Lo;0;L;;;;;N;;;;;
+1B2C9;NUSHU CHARACTER-1B2C9;Lo;0;L;;;;;N;;;;;
+1B2CA;NUSHU CHARACTER-1B2CA;Lo;0;L;;;;;N;;;;;
+1B2CB;NUSHU CHARACTER-1B2CB;Lo;0;L;;;;;N;;;;;
+1B2CC;NUSHU CHARACTER-1B2CC;Lo;0;L;;;;;N;;;;;
+1B2CD;NUSHU CHARACTER-1B2CD;Lo;0;L;;;;;N;;;;;
+1B2CE;NUSHU CHARACTER-1B2CE;Lo;0;L;;;;;N;;;;;
+1B2CF;NUSHU CHARACTER-1B2CF;Lo;0;L;;;;;N;;;;;
+1B2D0;NUSHU CHARACTER-1B2D0;Lo;0;L;;;;;N;;;;;
+1B2D1;NUSHU CHARACTER-1B2D1;Lo;0;L;;;;;N;;;;;
+1B2D2;NUSHU CHARACTER-1B2D2;Lo;0;L;;;;;N;;;;;
+1B2D3;NUSHU CHARACTER-1B2D3;Lo;0;L;;;;;N;;;;;
+1B2D4;NUSHU CHARACTER-1B2D4;Lo;0;L;;;;;N;;;;;
+1B2D5;NUSHU CHARACTER-1B2D5;Lo;0;L;;;;;N;;;;;
+1B2D6;NUSHU CHARACTER-1B2D6;Lo;0;L;;;;;N;;;;;
+1B2D7;NUSHU CHARACTER-1B2D7;Lo;0;L;;;;;N;;;;;
+1B2D8;NUSHU CHARACTER-1B2D8;Lo;0;L;;;;;N;;;;;
+1B2D9;NUSHU CHARACTER-1B2D9;Lo;0;L;;;;;N;;;;;
+1B2DA;NUSHU CHARACTER-1B2DA;Lo;0;L;;;;;N;;;;;
+1B2DB;NUSHU CHARACTER-1B2DB;Lo;0;L;;;;;N;;;;;
+1B2DC;NUSHU CHARACTER-1B2DC;Lo;0;L;;;;;N;;;;;
+1B2DD;NUSHU CHARACTER-1B2DD;Lo;0;L;;;;;N;;;;;
+1B2DE;NUSHU CHARACTER-1B2DE;Lo;0;L;;;;;N;;;;;
+1B2DF;NUSHU CHARACTER-1B2DF;Lo;0;L;;;;;N;;;;;
+1B2E0;NUSHU CHARACTER-1B2E0;Lo;0;L;;;;;N;;;;;
+1B2E1;NUSHU CHARACTER-1B2E1;Lo;0;L;;;;;N;;;;;
+1B2E2;NUSHU CHARACTER-1B2E2;Lo;0;L;;;;;N;;;;;
+1B2E3;NUSHU CHARACTER-1B2E3;Lo;0;L;;;;;N;;;;;
+1B2E4;NUSHU CHARACTER-1B2E4;Lo;0;L;;;;;N;;;;;
+1B2E5;NUSHU CHARACTER-1B2E5;Lo;0;L;;;;;N;;;;;
+1B2E6;NUSHU CHARACTER-1B2E6;Lo;0;L;;;;;N;;;;;
+1B2E7;NUSHU CHARACTER-1B2E7;Lo;0;L;;;;;N;;;;;
+1B2E8;NUSHU CHARACTER-1B2E8;Lo;0;L;;;;;N;;;;;
+1B2E9;NUSHU CHARACTER-1B2E9;Lo;0;L;;;;;N;;;;;
+1B2EA;NUSHU CHARACTER-1B2EA;Lo;0;L;;;;;N;;;;;
+1B2EB;NUSHU CHARACTER-1B2EB;Lo;0;L;;;;;N;;;;;
+1B2EC;NUSHU CHARACTER-1B2EC;Lo;0;L;;;;;N;;;;;
+1B2ED;NUSHU CHARACTER-1B2ED;Lo;0;L;;;;;N;;;;;
+1B2EE;NUSHU CHARACTER-1B2EE;Lo;0;L;;;;;N;;;;;
+1B2EF;NUSHU CHARACTER-1B2EF;Lo;0;L;;;;;N;;;;;
+1B2F0;NUSHU CHARACTER-1B2F0;Lo;0;L;;;;;N;;;;;
+1B2F1;NUSHU CHARACTER-1B2F1;Lo;0;L;;;;;N;;;;;
+1B2F2;NUSHU CHARACTER-1B2F2;Lo;0;L;;;;;N;;;;;
+1B2F3;NUSHU CHARACTER-1B2F3;Lo;0;L;;;;;N;;;;;
+1B2F4;NUSHU CHARACTER-1B2F4;Lo;0;L;;;;;N;;;;;
+1B2F5;NUSHU CHARACTER-1B2F5;Lo;0;L;;;;;N;;;;;
+1B2F6;NUSHU CHARACTER-1B2F6;Lo;0;L;;;;;N;;;;;
+1B2F7;NUSHU CHARACTER-1B2F7;Lo;0;L;;;;;N;;;;;
+1B2F8;NUSHU CHARACTER-1B2F8;Lo;0;L;;;;;N;;;;;
+1B2F9;NUSHU CHARACTER-1B2F9;Lo;0;L;;;;;N;;;;;
+1B2FA;NUSHU CHARACTER-1B2FA;Lo;0;L;;;;;N;;;;;
+1B2FB;NUSHU CHARACTER-1B2FB;Lo;0;L;;;;;N;;;;;
1BC00;DUPLOYAN LETTER H;Lo;0;L;;;;;N;;;;;
1BC01;DUPLOYAN LETTER X;Lo;0;L;;;;;N;;;;;
1BC02;DUPLOYAN LETTER P;Lo;0;L;;;;;N;;;;;
@@ -28269,6 +29217,12 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F248;TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557;So;0;L;<compat> 3014 6557 3015;;;;N;;;;;
1F250;CIRCLED IDEOGRAPH ADVANTAGE;So;0;L;<circle> 5F97;;;;N;;;;;
1F251;CIRCLED IDEOGRAPH ACCEPT;So;0;L;<circle> 53EF;;;;N;;;;;
+1F260;ROUNDED SYMBOL FOR FU;So;0;ON;;;;;N;;;;;
+1F261;ROUNDED SYMBOL FOR LU;So;0;ON;;;;;N;;;;;
+1F262;ROUNDED SYMBOL FOR SHOU;So;0;ON;;;;;N;;;;;
+1F263;ROUNDED SYMBOL FOR XI;So;0;ON;;;;;N;;;;;
+1F264;ROUNDED SYMBOL FOR SHUANGXI;So;0;ON;;;;;N;;;;;
+1F265;ROUNDED SYMBOL FOR CAI;So;0;ON;;;;;N;;;;;
1F300;CYCLONE;So;0;ON;;;;;N;;;;;
1F301;FOGGY;So;0;ON;;;;;N;;;;;
1F302;CLOSED UMBRELLA;So;0;ON;;;;;N;;;;;
@@ -29248,6 +30202,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F6D0;PLACE OF WORSHIP;So;0;ON;;;;;N;;;;;
1F6D1;OCTAGONAL SIGN;So;0;ON;;;;;N;;;;;
1F6D2;SHOPPING TROLLEY;So;0;ON;;;;;N;;;;;
+1F6D3;STUPA;So;0;ON;;;;;N;;;;;
+1F6D4;PAGODA;So;0;ON;;;;;N;;;;;
1F6E0;HAMMER AND WRENCH;So;0;ON;;;;;N;;;;;
1F6E1;SHIELD;So;0;ON;;;;;N;;;;;
1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;;
@@ -29268,6 +30224,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F6F4;SCOOTER;So;0;ON;;;;;N;;;;;
1F6F5;MOTOR SCOOTER;So;0;ON;;;;;N;;;;;
1F6F6;CANOE;So;0;ON;;;;;N;;;;;
+1F6F7;SLED;So;0;ON;;;;;N;;;;;
+1F6F8;FLYING SAUCER;So;0;ON;;;;;N;;;;;
1F700;ALCHEMICAL SYMBOL FOR QUINTESSENCE;So;0;ON;;;;;N;;;;;
1F701;ALCHEMICAL SYMBOL FOR AIR;So;0;ON;;;;;N;;;;;
1F702;ALCHEMICAL SYMBOL FOR FIRE;So;0;ON;;;;;N;;;;;
@@ -29617,6 +30575,18 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F8AB;RIGHTWARDS FRONT-TILTED SHADOWED WHITE ARROW;So;0;ON;;;;;N;;;;;
1F8AC;WHITE ARROW SHAFT WIDTH ONE;So;0;ON;;;;;N;;;;;
1F8AD;WHITE ARROW SHAFT WIDTH TWO THIRDS;So;0;ON;;;;;N;;;;;
+1F900;CIRCLED CROSS FORMEE WITH FOUR DOTS;So;0;ON;;;;;N;;;;;
+1F901;CIRCLED CROSS FORMEE WITH TWO DOTS;So;0;ON;;;;;N;;;;;
+1F902;CIRCLED CROSS FORMEE;So;0;ON;;;;;N;;;;;
+1F903;LEFT HALF CIRCLE WITH FOUR DOTS;So;0;ON;;;;;N;;;;;
+1F904;LEFT HALF CIRCLE WITH THREE DOTS;So;0;ON;;;;;N;;;;;
+1F905;LEFT HALF CIRCLE WITH TWO DOTS;So;0;ON;;;;;N;;;;;
+1F906;LEFT HALF CIRCLE WITH DOT;So;0;ON;;;;;N;;;;;
+1F907;LEFT HALF CIRCLE;So;0;ON;;;;;N;;;;;
+1F908;DOWNWARD FACING HOOK;So;0;ON;;;;;N;;;;;
+1F909;DOWNWARD FACING NOTCHED HOOK;So;0;ON;;;;;N;;;;;
+1F90A;DOWNWARD FACING HOOK WITH DOT;So;0;ON;;;;;N;;;;;
+1F90B;DOWNWARD FACING NOTCHED HOOK WITH DOT;So;0;ON;;;;;N;;;;;
1F910;ZIPPER-MOUTH FACE;So;0;ON;;;;;N;;;;;
1F911;MONEY-MOUTH FACE;So;0;ON;;;;;N;;;;;
1F912;FACE WITH THERMOMETER;So;0;ON;;;;;N;;;;;
@@ -29632,6 +30602,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F91C;RIGHT-FACING FIST;So;0;ON;;;;;N;;;;;
1F91D;HANDSHAKE;So;0;ON;;;;;N;;;;;
1F91E;HAND WITH INDEX AND MIDDLE FINGERS CROSSED;So;0;ON;;;;;N;;;;;
+1F91F;I LOVE YOU HAND SIGN;So;0;ON;;;;;N;;;;;
1F920;FACE WITH COWBOY HAT;So;0;ON;;;;;N;;;;;
1F921;CLOWN FACE;So;0;ON;;;;;N;;;;;
1F922;NAUSEATED FACE;So;0;ON;;;;;N;;;;;
@@ -29640,7 +30611,17 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F925;LYING FACE;So;0;ON;;;;;N;;;;;
1F926;FACE PALM;So;0;ON;;;;;N;;;;;
1F927;SNEEZING FACE;So;0;ON;;;;;N;;;;;
+1F928;FACE WITH ONE EYEBROW RAISED;So;0;ON;;;;;N;;;;;
+1F929;GRINNING FACE WITH STAR EYES;So;0;ON;;;;;N;;;;;
+1F92A;GRINNING FACE WITH ONE LARGE AND ONE SMALL EYE;So;0;ON;;;;;N;;;;;
+1F92B;FACE WITH FINGER COVERING CLOSED LIPS;So;0;ON;;;;;N;;;;;
+1F92C;SERIOUS FACE WITH SYMBOLS COVERING MOUTH;So;0;ON;;;;;N;;;;;
+1F92D;SMILING FACE WITH SMILING EYES AND HAND COVERING MOUTH;So;0;ON;;;;;N;;;;;
+1F92E;FACE WITH OPEN MOUTH VOMITING;So;0;ON;;;;;N;;;;;
+1F92F;SHOCKED FACE WITH EXPLODING HEAD;So;0;ON;;;;;N;;;;;
1F930;PREGNANT WOMAN;So;0;ON;;;;;N;;;;;
+1F931;BREAST-FEEDING;So;0;ON;;;;;N;;;;;
+1F932;PALMS UP TOGETHER;So;0;ON;;;;;N;;;;;
1F933;SELFIE;So;0;ON;;;;;N;;;;;
1F934;PRINCE;So;0;ON;;;;;N;;;;;
1F935;MAN IN TUXEDO;So;0;ON;;;;;N;;;;;
@@ -29665,6 +30646,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F949;THIRD PLACE MEDAL;So;0;ON;;;;;N;;;;;
1F94A;BOXING GLOVE;So;0;ON;;;;;N;;;;;
1F94B;MARTIAL ARTS UNIFORM;So;0;ON;;;;;N;;;;;
+1F94C;CURLING STONE;So;0;ON;;;;;N;;;;;
1F950;CROISSANT;So;0;ON;;;;;N;;;;;
1F951;AVOCADO;So;0;ON;;;;;N;;;;;
1F952;CUCUMBER;So;0;ON;;;;;N;;;;;
@@ -29680,6 +30662,19 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F95C;PEANUTS;So;0;ON;;;;;N;;;;;
1F95D;KIWIFRUIT;So;0;ON;;;;;N;;;;;
1F95E;PANCAKES;So;0;ON;;;;;N;;;;;
+1F95F;DUMPLING;So;0;ON;;;;;N;;;;;
+1F960;FORTUNE COOKIE;So;0;ON;;;;;N;;;;;
+1F961;TAKEOUT BOX;So;0;ON;;;;;N;;;;;
+1F962;CHOPSTICKS;So;0;ON;;;;;N;;;;;
+1F963;BOWL WITH SPOON;So;0;ON;;;;;N;;;;;
+1F964;CUP WITH STRAW;So;0;ON;;;;;N;;;;;
+1F965;COCONUT;So;0;ON;;;;;N;;;;;
+1F966;BROCCOLI;So;0;ON;;;;;N;;;;;
+1F967;PIE;So;0;ON;;;;;N;;;;;
+1F968;PRETZEL;So;0;ON;;;;;N;;;;;
+1F969;CUT OF MEAT;So;0;ON;;;;;N;;;;;
+1F96A;SANDWICH;So;0;ON;;;;;N;;;;;
+1F96B;CANNED FOOD;So;0;ON;;;;;N;;;;;
1F980;CRAB;So;0;ON;;;;;N;;;;;
1F981;LION FACE;So;0;ON;;;;;N;;;;;
1F982;SCORPION;So;0;ON;;;;;N;;;;;
@@ -29698,7 +30693,36 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F98F;RHINOCEROS;So;0;ON;;;;;N;;;;;
1F990;SHRIMP;So;0;ON;;;;;N;;;;;
1F991;SQUID;So;0;ON;;;;;N;;;;;
+1F992;GIRAFFE FACE;So;0;ON;;;;;N;;;;;
+1F993;ZEBRA FACE;So;0;ON;;;;;N;;;;;
+1F994;HEDGEHOG;So;0;ON;;;;;N;;;;;
+1F995;SAUROPOD;So;0;ON;;;;;N;;;;;
+1F996;T-REX;So;0;ON;;;;;N;;;;;
+1F997;CRICKET;So;0;ON;;;;;N;;;;;
1F9C0;CHEESE WEDGE;So;0;ON;;;;;N;;;;;
+1F9D0;FACE WITH MONOCLE;So;0;ON;;;;;N;;;;;
+1F9D1;ADULT;So;0;ON;;;;;N;;;;;
+1F9D2;CHILD;So;0;ON;;;;;N;;;;;
+1F9D3;OLDER ADULT;So;0;ON;;;;;N;;;;;
+1F9D4;BEARDED PERSON;So;0;ON;;;;;N;;;;;
+1F9D5;PERSON WITH HEADSCARF;So;0;ON;;;;;N;;;;;
+1F9D6;PERSON IN STEAMY ROOM;So;0;ON;;;;;N;;;;;
+1F9D7;PERSON CLIMBING;So;0;ON;;;;;N;;;;;
+1F9D8;PERSON IN LOTUS POSITION;So;0;ON;;;;;N;;;;;
+1F9D9;MAGE;So;0;ON;;;;;N;;;;;
+1F9DA;FAIRY;So;0;ON;;;;;N;;;;;
+1F9DB;VAMPIRE;So;0;ON;;;;;N;;;;;
+1F9DC;MERPERSON;So;0;ON;;;;;N;;;;;
+1F9DD;ELF;So;0;ON;;;;;N;;;;;
+1F9DE;GENIE;So;0;ON;;;;;N;;;;;
+1F9DF;ZOMBIE;So;0;ON;;;;;N;;;;;
+1F9E0;BRAIN;So;0;ON;;;;;N;;;;;
+1F9E1;ORANGE HEART;So;0;ON;;;;;N;;;;;
+1F9E2;BILLED CAP;So;0;ON;;;;;N;;;;;
+1F9E3;SCARF;So;0;ON;;;;;N;;;;;
+1F9E4;GLOVES;So;0;ON;;;;;N;;;;;
+1F9E5;COAT;So;0;ON;;;;;N;;;;;
+1F9E6;SOCKS;So;0;ON;;;;;N;;;;;
20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
@@ -29707,6 +30731,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
2B820;<CJK Ideograph Extension E, First>;Lo;0;L;;;;;N;;;;;
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
+2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;;
+2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index fefd7d3b70..674e5a0628 100755
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -65,7 +65,7 @@ main(_) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
parse_unicode_data(Line0, Acc) ->
- Line = string:strip(Line0, right, $\n),
+ Line = string:chomp(Line0),
[CodePoint,Name,_Cat,Class,_BiDi,Decomp,
_N1,_N2,_N3,_BDMirror,_Uni1,_Iso|Case] = tokens(Line, ";"),
{Dec,Comp} = case to_decomp(Decomp) of
@@ -78,14 +78,14 @@ parse_unicode_data(Line0, Acc) ->
|Acc].
to_class(String) ->
- list_to_integer(string:strip(String, both)).
+ list_to_integer(string:trim(String, both)).
to_decomp("") -> [];
to_decomp("<" ++ Str) ->
- [Tag,Rest] = string:tokens(Str, ">"),
+ [Tag,Rest] = string:lexemes(Str, ">"),
{list_to_atom(Tag), to_decomp(Rest)};
to_decomp(CodePoints) ->
- CPL = string:tokens(CodePoints, " "),
+ CPL = string:lexemes(CodePoints, " "),
[hex_to_int(CP) || CP <- CPL].
to_case(["","",""]) -> [];
@@ -105,20 +105,20 @@ parse_special_casing(Line, Table) ->
array:set(CP, Entry#cp{cs=Case}, Table).
to_scase([Lower,Title,Upper|_]) ->
- {unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Upper, " "), both)]),
- unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Lower, " "), both)]),
- unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Title, " "), both)]),
+ {unlist([hex_to_int(CP) || CP <- string:lexemes(Upper, " ")]),
+ unlist([hex_to_int(CP) || CP <- string:lexemes(Lower, " ")]),
+ unlist([hex_to_int(CP) || CP <- string:lexemes(Title, " ")]),
[]}.
parse_case_folding(Line, Table) ->
[CodePoint, Class0, CaseStr |_Comments] = tokens(Line, ";"),
- Class = string:strip(Class0, both),
+ Class = string:trim(Class0, both),
if Class =:= "T" -> Table; %% Do not support localization yet
Class =:= "S" -> Table; %% Ignore simple
true ->
CP = hex_to_int(CodePoint),
Case = unlist([hex_to_int(CPC) ||
- CPC <- string:strip(string:tokens(CaseStr, " "), both)]),
+ CPC <- string:lexemes(CaseStr, " ")]),
#cp{cs={U,L,T,_}} = Entry = array:get(CP, Table),
array:set(CP, Entry#cp{cs={U,L,T,Case}}, Table)
end.
@@ -186,7 +186,7 @@ gen_static(Fd) ->
" {U,L} -> #{upper=>U,lower=>L,title=>U,fold=>L};\n"
" {U,L,T,F} -> #{upper=>U,lower=>L,title=>T,fold=>F}\n"
" end.\n\n"),
- io:put_chars(Fd, "spec_version() -> {9,0}.\n\n\n"),
+ io:put_chars(Fd, "spec_version() -> {10,0}.\n\n\n"),
io:put_chars(Fd, "class(Codepoint) -> {CCC,_,_} = unicode_table(Codepoint),\n CCC.\n\n"),
io:put_chars(Fd, "-spec uppercase(unicode:chardata()) -> "
"maybe_improper_list(gc(),unicode:chardata()).\n"),
@@ -869,10 +869,10 @@ optimize_ranges_1(Rs) ->
hex_to_int([]) -> [];
hex_to_int(HexStr) ->
- list_to_integer(string:strip(HexStr, both), 16).
+ list_to_integer(string:trim(HexStr, both), 16).
to_atom(Str) ->
- list_to_atom(string:to_lower(string:strip(Str, both))).
+ list_to_atom(string:lowercase(string:trim(Str, both))).
foldl(Fun, Acc, Fd) ->
Get = fun() -> file:read_line(Fd) end,
@@ -892,7 +892,7 @@ foldl_1(Fun, Acc, Get) ->
-%% Differs from string:tokens, it returns empty string as token between two delimiters
+%% Differs from string:lexemes, it returns empty string as token between two delimiters
tokens(S, [C]) ->
tokens(lists:reverse(S), C, []).