diff options
Diffstat (limited to 'lib/stdlib')
61 files changed, 6157 insertions, 934 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 93eac8220d..aeed79408b 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -98,6 +98,7 @@ XML_REF3_FILES = \ sys.xml \ timer.xml \ unicode.xml \ + uri_string.xml \ win32reg.xml \ zip.xml diff --git a/lib/stdlib/doc/src/c.xml b/lib/stdlib/doc/src/c.xml index 7666699183..697e1715e7 100644 --- a/lib/stdlib/doc/src/c.xml +++ b/lib/stdlib/doc/src/c.xml @@ -94,6 +94,15 @@ </func> <func> + <name name="erlangrc" arity="1"/> + <fsummary>Load an erlang resource file.</fsummary> + <desc> + <p>Search <c>PathList</c> and load <c>.erlang</c> resource file if + found.</p> + </desc> + </func> + + <func> <name name="flush" arity="0"/> <fsummary>Flush any messages sent to the shell.</fsummary> <desc> diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 80c4acffdb..5e631aac21 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -45,6 +45,30 @@ <p>For more information about raw filenames, see the <seealso marker="kernel:file"><c>file</c></seealso> module.</p> + + <note> + <p> + Functionality in this module generally assumes valid input and + does not necessarily fail on input that does not use a valid + encoding, but may instead very likely produce invalid output. + </p> + <p> + File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now <em>rejected</em> and will cause primitive + file operations to fail. + </p> + </note> + <warning><p> + Currently null characters at the end of the filename + will be accepted by primitive file operations. Such + filenames are however still documented as invalid. The + implementation will also change in the future and + reject such filenames. + </p></warning> </description> <datatypes> @@ -193,6 +217,11 @@ <p>Other characters represent themselves. Only filenames that have exactly the same character in the same position match. Matching is case-sensitive, for example, "a" does not match "A".</p> + <p>Directory separators must always be written as <c>/</c>, even on + Windows.</p> + <p>A character preceded by <c>\</c> loses its special meaning. Note + that <c>\</c> must be written as <c>\\</c> in a string literal. + For example, "\\?*" will match any filename starting with <c>?</c>.</p> <p>Notice that multiple "*" characters are allowed (as in Unix wildcards, but opposed to Windows/DOS wildcards).</p> <p><em>Examples:</em></p> diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml index 14fd5ef787..d2608ad542 100644 --- a/lib/stdlib/doc/src/filename.xml +++ b/lib/stdlib/doc/src/filename.xml @@ -46,7 +46,10 @@ filename by removing redundant directory separators, use <seealso marker="#join/1"><c>join/1</c></seealso>.</p> - <p>The module supports raw filenames in the way that if a binary is + <p> + The module supports + <seealso marker="unicode_usage#notes-about-raw-filenames">raw + filenames</seealso> in the way that if a binary is present, or the filename cannot be interpreted according to the return value of <seealso marker="kernel:file#native_name_encoding/0"> <c>file:native_name_encoding/0</c></seealso>, a raw filename is also @@ -56,6 +59,30 @@ (the join operation is performed of course). For more information about raw filenames, see the <seealso marker="kernel:file"><c>file</c></seealso> module.</p> + + <note> + <p> + Functionality in this module generally assumes valid input and + does not necessarily fail on input that does not use a valid + encoding, but may instead very likely produce invalid output. + </p> + <p> + File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now <em>rejected</em> and will cause primitive + file operations to fail. + </p> + </note> + <warning><p> + Currently null characters at the end of the filename + will be accepted by primitive file operations. Such + filenames are however still documented as invalid. The + implementation will also change in the future and + reject such filenames. + </p></warning> </description> <datatypes> <datatype> @@ -555,6 +582,7 @@ unsafe</pre> ["a:/","msdev","include"]</pre> </desc> </func> + </funcs> </erlref> diff --git a/lib/stdlib/doc/src/gen_server.xml b/lib/stdlib/doc/src/gen_server.xml index 7d137fc772..da74e793e6 100644 --- a/lib/stdlib/doc/src/gen_server.xml +++ b/lib/stdlib/doc/src/gen_server.xml @@ -60,6 +60,8 @@ gen_server:abcast -----> Module:handle_cast/2 - -----> Module:handle_info/2 +- -----> Module:handle_continue/2 + - -----> Module:terminate/2 - -----> Module:code_change/3</pre> @@ -88,6 +90,13 @@ gen_server:abcast -----> Module:handle_cast/2 implies at least two garbage collections (when hibernating and shortly after waking up) and is not something you want to do between each call to a busy server.</p> + + <p>If the <c>gen_server</c> process needs to perform an action + immediately after initialization or to break the execution of a + callback into multiple steps, it can return <c>{continue,Continue}</c> + in place of the time-out or hibernation value, which will immediately + invoke the <c>handle_continue/2</c> callback.</p> + </description> <funcs> @@ -610,12 +619,15 @@ gen_server:abcast -----> Module:handle_cast/2 <v>State = term()</v> <v>Result = {reply,Reply,NewState} | {reply,Reply,NewState,Timeout}</v> <v> | {reply,Reply,NewState,hibernate}</v> + <v> | {reply,Reply,NewState,{continue,Continue}}</v> <v> | {noreply,NewState} | {noreply,NewState,Timeout}</v> <v> | {noreply,NewState,hibernate}</v> + <v> | {noreply,NewState,{continue,Continue}}</v> <v> | {stop,Reason,Reply,NewState} | {stop,Reason,NewState}</v> <v> Reply = term()</v> <v> NewState = term()</v> <v> Timeout = int()>=0 | infinity</v> + <v> Continue = term()</v> <v> Reason = term()</v> </type> <desc> @@ -673,9 +685,11 @@ gen_server:abcast -----> Module:handle_cast/2 <v>State = term()</v> <v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v> <v> | {noreply,NewState,hibernate}</v> + <v> | {noreply,NewState,{continue,Continue}}</v> <v> | {stop,Reason,NewState}</v> <v> NewState = term()</v> <v> Timeout = int()>=0 | infinity</v> + <v> Continue = term()</v> <v> Reason = term()</v> </type> <desc> @@ -690,6 +704,41 @@ gen_server:abcast -----> Module:handle_cast/2 </func> <func> + <name>Module:handle_continue(Continue, State) -> Result</name> + <fsummary>Handle a continue instruction.</fsummary> + <type> + <v>Continue = term()</v> + <v>State = term()</v> + <v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v> + <v> | {noreply,NewState,hibernate}</v> + <v> | {noreply,NewState,{continue,Continue}}</v> + <v> | {stop,Reason,NewState}</v> + <v> NewState = term()</v> + <v> Timeout = int()>=0 | infinity</v> + <v> Continue = term()</v> + <v> Reason = normal | term()</v> + </type> + <desc> + <note> + <p>This callback is optional, so callback modules need to + export it only if they return <c>{continue,Continue}</c> + from another callback. If continue is used and the callback + is not implemented, the process will exit with <c>undef</c> + error.</p> + </note> + <p>This function is called by a <c>gen_server</c> process whenever + a previous callback returns <c>{continue, Continue}</c>. + <c>handle_continue/2</c> is invoked immediately after the previous + callback, which makes it useful for performing work after + initialization or for splitting the work in a callback in + multiple steps, updating the process state along the way.</p> + <p>For a description of the other arguments and possible return values, + see <seealso marker="#Module:handle_call/3"> + <c>Module:handle_call/3</c></seealso>.</p> + </desc> + </func> + + <func> <name>Module:handle_info(Info, State) -> Result</name> <fsummary>Handle an incoming message.</fsummary> <type> @@ -697,6 +746,7 @@ gen_server:abcast -----> Module:handle_cast/2 <v>State = term()</v> <v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v> <v> | {noreply,NewState,hibernate}</v> + <v> | {noreply,NewState,{continue,Continue}}</v> <v> | {stop,Reason,NewState}</v> <v> NewState = term()</v> <v> Timeout = int()>=0 | infinity</v> @@ -726,7 +776,7 @@ gen_server:abcast -----> Module:handle_cast/2 <type> <v>Args = term()</v> <v>Result = {ok,State} | {ok,State,Timeout} | {ok,State,hibernate}</v> - <v> | {stop,Reason} | ignore</v> + <v> | {ok,State,{continue,Continue}} | {stop,Reason} | ignore</v> <v> State = term()</v> <v> Timeout = int()>=0 | infinity</v> <v> Reason = term()</v> diff --git a/lib/stdlib/doc/src/gen_statem.xml b/lib/stdlib/doc/src/gen_statem.xml index a7caa71dcb..4a824f073e 100644 --- a/lib/stdlib/doc/src/gen_statem.xml +++ b/lib/stdlib/doc/src/gen_statem.xml @@ -1329,7 +1329,7 @@ handle_event(_, _, State, Data) -> <c><anno>T</anno></c> is the time-out time. <c>{clean_timeout,<anno>T</anno>}</c> works like just <c>T</c> described in the note above - and uses a proxy process for <c>T < infinity</c>, + and uses a proxy process while <c>{dirty_timeout,<anno>T</anno>}</c> bypasses the proxy process which is more lightweight. </p> @@ -1339,8 +1339,12 @@ handle_event(_, _, State, Data) -> with <c>{dirty_timeout,<anno>T</anno>}</c> to avoid that the calling process dies when the call times out, you will have to be prepared to handle - a late reply. - So why not just let the calling process die? + a late reply. Note that there is an odd chance + to get a late reply even with + <c>{dirty_timeout,infinity}</c> or <c>infinity</c> + for example in the event of network problems. + So why not just let the calling process die + by not catching the exception? </p> </note> <p> @@ -1851,7 +1855,7 @@ handle_event(_, _, State, Data) -> </p> <note> <p> - Note that if the <c>gen_statem</c> is started trough + Note that if the <c>gen_statem</c> is started through <seealso marker="proc_lib"><c>proc_lib</c></seealso> and <seealso marker="#enter_loop/4"><c>enter_loop/4-6</c></seealso>, diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml index 89fb858823..21f680a0ee 100644 --- a/lib/stdlib/doc/src/rand.xml +++ b/lib/stdlib/doc/src/rand.xml @@ -133,8 +133,9 @@ variable <c>rand_seed</c> to remember the current state.</p> <p>If a process calls - <seealso marker="#uniform-0"><c>uniform/0</c></seealso> or - <seealso marker="#uniform-1"><c>uniform/1</c></seealso> without + <seealso marker="#uniform-0"><c>uniform/0</c></seealso>, + <seealso marker="#uniform-1"><c>uniform/1</c></seealso> or + <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso> without setting a seed first, <seealso marker="#seed-1"><c>seed/1</c></seealso> is called automatically with the default algorithm and creates a non-constant seed.</p> @@ -168,10 +169,17 @@ R3 = rand:uniform(),</pre> S0 = rand:seed_s(exrop), {R4, S1} = rand:uniform_s(S0),</pre> + <p>Textbook basic form Box-Muller standard normal deviate</p> + + <pre> +R5 = rand:uniform_real(), +R6 = rand:uniform(), +SND0 = math:sqrt(-2 * math:log(R5)) * math:cos(math:pi() * R6)</pre> + <p>Create a standard normal deviate:</p> <pre> -{SND0, S2} = rand:normal_s(S1),</pre> +{SND1, S2} = rand:normal_s(S1),</pre> <p>Create a normal deviate with mean -3 and variance 0.5:</p> @@ -414,7 +422,8 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre> This function may return exactly <c>0.0</c> which can be fatal for certain applications. If that is undesired you can use <c>(1.0 - rand:uniform())</c> to get the - interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + interval <c>0.0 < <anno>X</anno> =< 1.0</c>, or instead use + <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso>. </p> <p> If neither endpoint is desired you can test and re-try @@ -432,6 +441,42 @@ end.</pre> </func> <func> + <name name="uniform_real" arity="0"/> + <fsummary>Return a random float.</fsummary> + <desc><marker id="uniform_real-0"/> + <p> + Returns a random float + uniformly distributed in the value range + <c>DBL_MIN =< <anno>X</anno> < 1.0</c> + and updates the state in the process dictionary. + </p> + <p> + Conceptually, a random real number <c>R</c> is generated + from the interval <c>0 =< R < 1</c> and then the + closest rounded down normalized number + in the IEEE 754 Double precision format + is returned. + </p> + <note> + <p> + The generated numbers from this function has got better + granularity for small numbers than the regular + <seealso marker="#uniform-0"><c>uniform/0</c></seealso> + because all bits in the mantissa are random. + This property, in combination with the fact that exactly zero + is never returned is useful for algoritms doing for example + <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>. + </p> + </note> + <p> + See + <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso> + for more explanation. + </p> + </desc> + </func> + + <func> <name name="uniform" arity="1"/> <fsummary>Return a random integer.</fsummary> <desc><marker id="uniform-1"/> @@ -460,7 +505,8 @@ end.</pre> This function may return exactly <c>0.0</c> which can be fatal for certain applications. If that is undesired you can use <c>(1.0 - rand:uniform(State))</c> to get the - interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + interval <c>0.0 < <anno>X</anno> =< 1.0</c>, or instead use + <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>. </p> <p> If neither endpoint is desired you can test and re-try @@ -478,6 +524,68 @@ end.</pre> </func> <func> + <name name="uniform_real_s" arity="1"/> + <fsummary>Return a random float.</fsummary> + <desc> + <p> + Returns, for a specified state, a random float + uniformly distributed in the value range + <c>DBL_MIN =< <anno>X</anno> < 1.0</c> + and updates the state in the process dictionary. + </p> + <p> + Conceptually, a random real number <c>R</c> is generated + from the interval <c>0 =< R < 1</c> and then the + closest rounded down normalized number + in the IEEE 754 Double precision format + is returned. + </p> + <note> + <p> + The generated numbers from this function has got better + granularity for small numbers than the regular + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso> + because all bits in the mantissa are random. + This property, in combination with the fact that exactly zero + is never returned is useful for algoritms doing for example + <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>. + </p> + </note> + <p> + The concept implicates that the probability to get + exactly zero is extremely low; so low that this function + is in fact guaranteed to never return zero. The smallest + number that it might return is <c>DBL_MIN</c>, which is + 2.0^(-1022). + </p> + <p> + The value range stated at the top of this function + description is technically correct, but + <c>0.0 =< <anno>X</anno> < 1.0</c> + is a better description of the generated numbers' + statistical distribution. Except that exactly 0.0 + is never returned, which is not possible to observe + statistically. + </p> + <p> + For example; for all sub ranges + <c>N*2.0^(-53) =< X < (N+1)*2.0^(-53)</c> + where + <c>0 =< integer(N) < 2.0^53</c> + the probability is the same. + Compare that with the form of the numbers generated by + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>. + </p> + <p> + Having to generate extra random bits for + small numbers costs a little performance. + This function is about 20% slower than the regular + <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso> + </p> + </desc> + </func> + + <func> <name name="uniform_s" arity="2"/> <fsummary>Return a random integer.</fsummary> <desc> diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml index 878a3babc5..68bfddbc71 100644 --- a/lib/stdlib/doc/src/ref_man.xml +++ b/lib/stdlib/doc/src/ref_man.xml @@ -93,6 +93,7 @@ <xi:include href="sys.xml"/> <xi:include href="timer.xml"/> <xi:include href="unicode.xml"/> + <xi:include href="uri_string.xml"/> <xi:include href="win32reg.xml"/> <xi:include href="zip.xml"/> </application> diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml index 45b207b13d..d559adf9b6 100644 --- a/lib/stdlib/doc/src/specs.xml +++ b/lib/stdlib/doc/src/specs.xml @@ -60,6 +60,7 @@ <xi:include href="../specs/specs_sys.xml"/> <xi:include href="../specs/specs_timer.xml"/> <xi:include href="../specs/specs_unicode.xml"/> + <xi:include href="../specs/specs_uri_string.xml"/> <xi:include href="../specs/specs_win32reg.xml"/> <xi:include href="../specs/specs_zip.xml"/> </specs> diff --git a/lib/stdlib/doc/src/string.xml b/lib/stdlib/doc/src/string.xml index 9d5edd9ecf..130fc74a28 100644 --- a/lib/stdlib/doc/src/string.xml +++ b/lib/stdlib/doc/src/string.xml @@ -109,10 +109,8 @@ <p>This module has been reworked in Erlang/OTP 20 to handle <seealso marker="unicode#type-chardata"> <c>unicode:chardata()</c></seealso> and operate on grapheme - clusters. The <seealso marker="#oldapi"> <c>old - functions</c></seealso> that only work on Latin-1 lists as input - are still available but should not be - used. They will be deprecated in Erlang/OTP 21. + clusters. The <c>old functions</c> that only work on Latin-1 lists as input + are kept for backwards compatibility reasons but should not be used. </p> </description> @@ -594,7 +592,7 @@ ÖÄÅ</pre> or <c>both</c>, indicates from which direction characters are to be removed. </p> - <p> Default <c><anno>Characters</anno></c> are the set of + <p> Default <c><anno>Characters</anno></c> is the set of nonbreakable whitespace codepoints, defined as Pattern_White_Space in <url href="http://unicode.org/reports/tr31/">Unicode Standard Annex #31</url>. @@ -631,393 +629,5 @@ ÖÄÅ</pre> </func> </funcs> - - <section> - <marker id="oldapi"/> - <title>Obsolete API functions</title> - <p>Here follows the function of the old API. - These functions only work on a list of Latin-1 characters. - </p> - <note><p> - The functions are kept for backward compatibility, but are - not recommended. - They will be deprecated in Erlang/OTP 21. - </p> - <p>Any undocumented functions in <c>string</c> are not to be used.</p> - </note> - </section> - - <funcs> - <func> - <name name="centre" arity="2"/> - <name name="centre" arity="3"/> - <fsummary>Center a string.</fsummary> - <desc> - <p>Returns a string, where <c><anno>String</anno></c> is centered in the - string and surrounded by blanks or <c><anno>Character</anno></c>. - The resulting string has length <c><anno>Number</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/3"><c>pad/3</c></seealso>. - </p> - </desc> - </func> - - <func> - <name name="chars" arity="2"/> - <name name="chars" arity="3"/> - <fsummary>Return a string consisting of numbers of characters.</fsummary> - <desc> - <p>Returns a string consisting of <c><anno>Number</anno></c> characters - <c><anno>Character</anno></c>. Optionally, the string can end with - string <c><anno>Tail</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="chr" arity="2"/> - <fsummary>Return the index of the first occurrence of - a character in a string.</fsummary> - <desc> - <p>Returns the index of the first occurrence of - <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns - <c>0</c> if <c><anno>Character</anno></c> does not occur.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/2"><c>find/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="concat" arity="2"/> - <fsummary>Concatenate two strings.</fsummary> - <desc> - <p>Concatenates <c><anno>String1</anno></c> and - <c><anno>String2</anno></c> to form a new string - <c><anno>String3</anno></c>, which is returned.</p> - <p> - This function is <seealso marker="#oldapi">obsolete</seealso>. - Use <c>[<anno>String1</anno>, <anno>String2</anno>]</c> as - <c>Data</c> argument, and call - <seealso marker="unicode#characters_to_list/2"> - <c>unicode:characters_to_list/2</c></seealso> or - <seealso marker="unicode#characters_to_binary/2"> - <c>unicode:characters_to_binary/2</c></seealso> - to flatten the output. - </p> - </desc> - </func> - - <func> - <name name="copies" arity="2"/> - <fsummary>Copy a string.</fsummary> - <desc> - <p>Returns a string containing <c><anno>String</anno></c> repeated - <c><anno>Number</anno></c> times.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="cspan" arity="2"/> - <fsummary>Span characters at start of a string.</fsummary> - <desc> - <p>Returns the length of the maximum initial segment of - <c><anno>String</anno></c>, which consists entirely of characters - not from <c><anno>Chars</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#take/3"><c>take/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:cspan("\t abcdef", " \t"). -0</code> - </desc> - </func> - - <func> - <name name="join" arity="2"/> - <fsummary>Join a list of strings with separator.</fsummary> - <desc> - <p>Returns a string with the elements of <c><anno>StringList</anno></c> - separated by the string in <c><anno>Separator</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#join/2"><c>lists:join/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> join(["one", "two", "three"], ", "). -"one, two, three"</code> - </desc> - </func> - - <func> - <name name="left" arity="2"/> - <name name="left" arity="3"/> - <fsummary>Adjust left end of a string.</fsummary> - <desc> - <p>Returns <c><anno>String</anno></c> with the length adjusted in - accordance with <c><anno>Number</anno></c>. The left margin is - fixed. If <c>length(<anno>String</anno>)</c> < - <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded - with blanks or <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/2"><c>pad/2</c></seealso> or - <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:left("Hello",10,$.). -"Hello....."</code> - </desc> - </func> - - <func> - <name name="len" arity="1"/> - <fsummary>Return the length of a string.</fsummary> - <desc> - <p>Returns the number of characters in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#length/1"><c>length/1</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="rchr" arity="2"/> - <fsummary>Return the index of the last occurrence of - a character in a string.</fsummary> - <desc> - <p>Returns the index of the last occurrence of - <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns - <c>0</c> if <c><anno>Character</anno></c> does not occur.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/3"><c>find/3</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="right" arity="2"/> - <name name="right" arity="3"/> - <fsummary>Adjust right end of a string.</fsummary> - <desc> - <p>Returns <c><anno>String</anno></c> with the length adjusted in - accordance with <c><anno>Number</anno></c>. The right margin is - fixed. If the length of <c>(<anno>String</anno>)</c> < - <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded - with blanks or <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:right("Hello", 10, $.). -".....Hello"</code> - </desc> - </func> - - <func> - <name name="rstr" arity="2"/> - <fsummary>Find the index of a substring.</fsummary> - <desc> - <p>Returns the position where the last occurrence of - <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>. - Returns <c>0</c> if <c><anno>SubString</anno></c> - does not exist in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/3"><c>find/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:rstr(" Hello Hello World World ", "Hello World"). -8</code> - </desc> - </func> - - <func> - <name name="span" arity="2"/> - <fsummary>Span characters at start of a string.</fsummary> - <desc> - <p>Returns the length of the maximum initial segment of - <c><anno>String</anno></c>, which consists entirely of characters - from <c><anno>Chars</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#take/2"><c>take/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:span("\t abcdef", " \t"). -5</code> - </desc> - </func> - - <func> - <name name="str" arity="2"/> - <fsummary>Find the index of a substring.</fsummary> - <desc> - <p>Returns the position where the first occurrence of - <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>. - Returns <c>0</c> if <c><anno>SubString</anno></c> - does not exist in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/2"><c>find/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:str(" Hello Hello World World ", "Hello World"). -8</code> - </desc> - </func> - - <func> - <name name="strip" arity="1"/> - <name name="strip" arity="2"/> - <name name="strip" arity="3"/> - <fsummary>Strip leading or trailing characters.</fsummary> - <desc> - <p>Returns a string, where leading or trailing, or both, blanks or a - number of <c><anno>Character</anno></c> have been removed. - <c><anno>Direction</anno></c>, which can be <c>left</c>, <c>right</c>, - or <c>both</c>, indicates from which direction blanks are to be - removed. <c>strip/1</c> is equivalent to - <c>strip(String, both)</c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#trim/3"><c>trim/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:strip("...Hello.....", both, $.). -"Hello"</code> - </desc> - </func> - - <func> - <name name="sub_string" arity="2"/> - <name name="sub_string" arity="3"/> - <fsummary>Extract a substring.</fsummary> - <desc> - <p>Returns a substring of <c><anno>String</anno></c>, starting at - position <c><anno>Start</anno></c> to the end of the string, or to - and including position <c><anno>Stop</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -sub_string("Hello World", 4, 8). -"lo Wo"</code> - </desc> - </func> - - <func> - <name name="substr" arity="2"/> - <name name="substr" arity="3"/> - <fsummary>Return a substring of a string.</fsummary> - <desc> - <p>Returns a substring of <c><anno>String</anno></c>, starting at - position <c><anno>Start</anno></c>, and ending at the end of the - string or at length <c><anno>Length</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> substr("Hello World", 4, 5). -"lo Wo"</code> - </desc> - </func> - - <func> - <name name="sub_word" arity="2"/> - <name name="sub_word" arity="3"/> - <fsummary>Extract subword.</fsummary> - <desc> - <p>Returns the word in position <c><anno>Number</anno></c> of - <c><anno>String</anno></c>. Words are separated by blanks or - <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#nth_lexeme/3"><c>nth_lexeme/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:sub_word(" Hello old boy !",3,$o). -"ld b"</code> - </desc> - </func> - - <func> - <name name="to_lower" arity="1" clause_i="1"/> - <name name="to_lower" arity="1" clause_i="2"/> - <name name="to_upper" arity="1" clause_i="1"/> - <name name="to_upper" arity="1" clause_i="2"/> - <fsummary>Convert case of string (ISO/IEC 8859-1).</fsummary> - <type variable="String" name_i="1"/> - <type variable="Result" name_i="1"/> - <type variable="Char"/> - <type variable="CharResult"/> - <desc> - <p>The specified string or character is case-converted. Notice that - the supported character set is ISO/IEC 8859-1 (also called Latin 1); - all values outside this set are unchanged</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso> use - <seealso marker="#lowercase/1"><c>lowercase/1</c></seealso>, - <seealso marker="#uppercase/1"><c>uppercase/1</c></seealso>, - <seealso marker="#titlecase/1"><c>titlecase/1</c></seealso> or - <seealso marker="#casefold/1"><c>casefold/1</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="tokens" arity="2"/> - <fsummary>Split string into tokens.</fsummary> - <desc> - <p>Returns a list of tokens in <c><anno>String</anno></c>, separated - by the characters in <c><anno>SeparatorList</anno></c>.</p> - <p><em>Example:</em></p> - <code type="none"> -> tokens("abc defxxghix jkl", "x "). -["abc", "def", "ghi", "jkl"]</code> - <p>Notice that, as shown in this example, two or more - adjacent separator characters in <c><anno>String</anno></c> - are treated as one. That is, there are no empty - strings in the resulting list of tokens.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="words" arity="1"/> - <name name="words" arity="2"/> - <fsummary>Count blank separated words.</fsummary> - <desc> - <p>Returns the number of words in <c><anno>String</anno></c>, separated - by blanks or <c><anno>Character</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> words(" Hello old boy!", $o). -4</code> - </desc> - </func> - </funcs> - - <section> - <title>Notes</title> - <p>Some of the general string functions can seem to overlap each - other. The reason is that this string package is the - combination of two earlier packages and all functions of - both packages have been retained.</p> - </section> - </erlref> diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index 26dc46719e..789e063c12 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -719,8 +719,8 @@ Eshell V5.10.1 (abort with ^G) </section> <section> - <title>Unicode Filenames</title> <marker id="unicode_file_names"/> + <title>Unicode Filenames</title> <p>Most modern operating systems support Unicode filenames in some way. There are many different ways to do this and Erlang by default treats the different approaches differently:</p> @@ -855,8 +855,12 @@ Eshell V5.10.1 (abort with ^G) </note> <section> - <title>Notes About Raw Filenames</title> <marker id="notes-about-raw-filenames"/> + <title>Notes About Raw Filenames</title> + <note><p> + Note that raw filenames <em>not</em> necessarily are encoded the + same way as on the OS level. + </p></note> <p>Raw filenames were introduced together with Unicode filename support in ERTS 5.8.2 (Erlang/OTP R14B01). The reason "raw filenames" were introduced in the system was diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml new file mode 100644 index 0000000000..9ace2b0a05 --- /dev/null +++ b/lib/stdlib/doc/src/uri_string.xml @@ -0,0 +1,230 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2017</year><year>2017</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + </legalnotice> + + <title>uri_string</title> + <prepared>Péter Dimitrov</prepared> + <docno>1</docno> + <date>2017-10-24</date> + <rev>A</rev> + </header> + <module>uri_string</module> + <modulesummary>URI processing functions.</modulesummary> + <description> + <p>This module contains functions for parsing and handling URIs + (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>). + </p> + <p>A URI is an identifier consisting of a sequence of characters matching the syntax + rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>. + </p> + <p> The generic URI syntax consists of a hierarchical sequence of components referred + to as the scheme, authority, path, query, and fragment:</p> + <pre> + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + </pre><br></br> + <p>The interpretation of a URI depends only on the characters used and not on how those + characters are represented in a network protocol.</p> + <p>The functions implemented by this module cover the following use cases:</p> + <list type="bulleted"> + <item>Parsing URIs into its components and returing a map<br></br> + <seealso marker="#parse/1"><c>parse/1</c></seealso> + </item> + <item>Recomposing a map of URI components into a URI string<br></br> + <seealso marker="#recompose/1"><c>recompose/1</c></seealso> + </item> + <item>Changing inbound binary and percent-encoding of URIs<br></br> + <seealso marker="#transcode/2"><c>transcode/2</c></seealso> + </item> + <item>Transforming URIs into a normalized form<br></br> + <seealso marker="#normalize/1"><c>normalize/1</c></seealso> + </item> + </list> + <p>There are four different encodings present during the handling of URIs:</p> + <list type="bulleted"> + <item>Inbound binary encoding in binaries</item> + <item>Inbound percent-encoding in lists and binaries</item> + <item>Outbound binary encoding in binaries</item> + <item>Outbound percent-encoding in lists and binaries</item> + </list> + <p>Functions with <c>uri_string()</c> argument accept lists, binaries and + mixed lists (lists with binary elements) as input type. All of the functions but + <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries + and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p> + <p>Unless otherwise specified the return value type and encoding are the same as the input + type and encoding. That is, binary input returns binary output, list input returns a list + output but mixed input returns list output.</p> + <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding + and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert + between the supported encodings, it takes a <c>uri_string()</c> and a list of options + specifying inbound and outbound encodings.</p> + <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific + character encoding and it is usually defined by the protocol or surrounding text. This library + takes the same assumption, binary and percent-encoding are handled as one configuration unit, + they cannot be set to different values.</p> + </description> + + <datatypes> + <datatype> + <name name="error"/> + <desc> + <p>Error tuple indicating the type of error. Possible values of the second component:</p> + <list type="bulleted"> + <item><c>invalid_input</c></item> + <item><c>invalid_map</c></item> + <item><c>invalid_percent_encoding</c></item> + <item><c>invalid_scheme</c></item> + <item><c>invalid_uri</c></item> + <item><c>invalid_utf8</c></item> + </list> + <p>The third component is a term providing additional information about the + cause of the error.</p> + </desc> + </datatype> + <datatype> + <name name="uri_map"/> + <desc> + <p>Map holding the main components of a URI.</p> + </desc> + </datatype> + <datatype> + <name name="uri_string"/> + <desc> + <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two, + representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant URI (<em>percent-encoded form</em>). + A URI is a sequence of characters from a very limited set: the letters of + the basic Latin alphabet, digits, and a few special characters.</p> + </desc> + </datatype> + </datatypes> + + <funcs> + + <func> + <name name="normalize" arity="1"/> + <fsummary>Syntax-based normalization.</fsummary> + <desc> + <p>Transforms <c><anno>URIString</anno></c> into a normalized form + using Syntax-Based Normalization as defined by + <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p> + <p>This function implements case normalization, percent-encoding + normalization, path segment normalization and scheme based normalization + for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:normalize("/a/b/c/./../../g").</input> +"/a/g" +2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]> +<![CDATA[<<"mid/6">>]]> +3> uri_string:normalize("http://localhost:80"). +"https://localhost/" + </pre> + </desc> + </func> + + <func> + <name name="parse" arity="1"/> + <fsummary>Parse URI into a map.</fsummary> + <desc> + <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed + components of the <c>URI</c>. + If parsing fails, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#recompose/1"> + <c>recompose/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input> +#{fragment => "nose",host => "example.com", + path => "/over/there",port => 8042,query => "name=ferret", + scheme => foo,userinfo => "user"} +2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]> +<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>, + port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>, + userinfo => <<"user">>}]]> + </pre> + </desc> + </func> + + <func> + <name name="recompose" arity="1"/> + <fsummary>Recompose URI.</fsummary> + <desc> + <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant + <c><anno>URIString</anno></c> (percent-encoded), based on the components of + <c><anno>URIMap</anno></c>. + If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p> + <p>See also the opposite operation <seealso marker="#parse/1"> + <c>parse/1</c></seealso>.</p> + <p><em>Example:</em></p> + <pre> +1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input> +1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. +#{fragment => "top",host => "example.com", + path => "/over/there",port => 8042,query => "?name=ferret", + scheme => foo,userinfo => "user"} + +2> <input>uri_string:recompose(URIMap).</input> +"foo://example.com:8042/over/there?name=ferret#nose"</pre> + </desc> + </func> + + <func> + <name name="transcode" arity="2"/> + <fsummary>Transcode URI.</fsummary> + <desc> + <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> + compliant <c><anno>URIString</anno></c>, + where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound + (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c> + and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the + input and output data. Mixed encoding, where binary encoding is not the same as + percent-encoding, is not supported. + If an argument is invalid, an error tuple is returned.</p> + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input> +1> [{in_encoding, utf32},{out_encoding, utf8}]). +<![CDATA[<<"foo%C3%B6bar"/utf8>>]]> +2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1}, +2> {out_encoding, utf8}]). +"foo%C3%B6bar" + </pre> + </desc> + </func> + + </funcs> +</erlref> diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile index bf836203ec..8b156929d7 100644 --- a/lib/stdlib/src/Makefile +++ b/lib/stdlib/src/Makefile @@ -121,6 +121,7 @@ MODULES= \ timer \ unicode \ unicode_util \ + uri_string \ win32reg \ zip diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index 5885745fb1..c8cf6fdffe 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -113,9 +113,9 @@ encode_binary(Bin) -> Data :: ascii_binary(). decode(Bin) when is_binary(Bin) -> - decode_binary(<<>>, Bin); + decode_binary(Bin, <<>>); decode(List) when is_list(List) -> - list_to_binary(decode_l(List)). + decode_list(List, <<>>). -spec mime_decode(Base64) -> Data when Base64 :: ascii_string() | ascii_binary(), @@ -186,31 +186,41 @@ mime_decode_to_string(List) when is_list(List) -> bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). -decode_binary(Result0, <<C:8,T0/bits>>) -> - case element(C, ?DECODE_MAP) of - bad -> - erlang:error({badarg,C}); - ws -> - decode_binary(Result0, T0); - eq -> - case strip_ws(T0) of - <<$=:8,T/binary>> -> - <<>> = strip_ws(T), - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - T -> - <<>> = strip_ws(T), - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result - end; - Bits -> - decode_binary(<<Result0/bits,Bits:6>>, T0) +decode_binary(<<C1:8, Cs/bits>>, A) -> + case element(C1, ?DECODE_MAP) of + ws -> decode_binary(Cs, A); + B1 -> decode_binary(Cs, A, B1) end; -decode_binary(Result, <<>>) -> - true = is_binary(Result), - Result. +decode_binary(<<>>, A) -> + A. + +decode_binary(<<C2:8, Cs/bits>>, A, B1) -> + case element(C2, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1); + B2 -> decode_binary(Cs, A, B1, B2) + end. + +decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) -> + case element(C3, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1, B2); + B3 -> decode_binary(Cs, A, B1, B2, B3) + end. + +decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) -> + case element(C4, ?DECODE_MAP) of + ws -> decode_binary(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); + eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) + end. + +only_ws_binary(<<>>, A) -> + A; +only_ws_binary(<<C:8, Cs/bits>>, A) -> + case element(C, ?DECODE_MAP) of + ws -> only_ws_binary(Cs, A); + _ -> erlang:error(function_clause) + end. %% Skipping pad character if not at end of string. Also liberal about %% excess padding and skipping of other illegal (non-base64 alphabet) @@ -262,6 +272,42 @@ mime_decode_binary_after_eq(Result0, <<>>, Eq) -> Result end. +decode_list([C1 | Cs], A) -> + case element(C1, ?DECODE_MAP) of + ws -> decode_list(Cs, A); + B1 -> decode_list(Cs, A, B1) + end; +decode_list([], A) -> + A. + +decode_list([C2 | Cs], A, B1) -> + case element(C2, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1); + B2 -> decode_list(Cs, A, B1, B2) + end. + +decode_list([C3 | Cs], A, B1, B2) -> + case element(C3, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1, B2); + B3 -> decode_list(Cs, A, B1, B2, B3) + end. + +decode_list([C4 | Cs], A, B1, B2, B3) -> + case element(C4, ?DECODE_MAP) of + ws -> decode_list(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); + eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) + end. + +only_ws([], A) -> + A; +only_ws([C | Cs], A) -> + case element(C, ?DECODE_MAP) of + ws -> only_ws(Cs, A); + _ -> erlang:error(function_clause) + end. + decode([], A) -> A; decode([$=,$=,C2,C1|Cs], A) -> Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), @@ -292,16 +338,6 @@ strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). -strip_ws(<<$\t,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\n,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\r,T/binary>>) -> - strip_ws(T); -strip_ws(<<$\s,T/binary>>) -> - strip_ws(T); -strip_ws(T) -> T. - %% Skipping pad character if not at end of string. Also liberal about %% excess padding and skipping of other illegal (non-base64 alphabet) %% characters. See section 3.3 of RFC4648 diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index c04a201ce1..9a447af5b7 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -668,19 +668,23 @@ lm() -> [l(M) || M <- mm()]. %% erlangrc(Home) -%% Try to run a ".erlang" file, first in the current directory -%% else in home directory. +%% Try to run a ".erlang" file in home directory. + +-spec erlangrc() -> {ok, file:filename()} | {error, term()}. erlangrc() -> case init:get_argument(home) of {ok,[[Home]]} -> erlangrc([Home]); _ -> - f_p_e(["."], ".erlang") + {error, enoent} end. -erlangrc([Home]) -> - f_p_e([".",Home], ".erlang"). +-spec erlangrc(PathList) -> {ok, file:filename()} | {error, term()} + when PathList :: [Dir :: file:name()]. + +erlangrc([Home|_]=Paths) when is_list(Home) -> + f_p_e(Paths, ".erlang"). error(Fmt, Args) -> error_logger:error_msg(Fmt, Args). @@ -692,11 +696,11 @@ f_p_e(P, F) -> {error, E={Line, _Mod, _Term}} -> error("file:path_eval(~tp,~tp): error on line ~p: ~ts~n", [P, F, Line, file:format_error(E)]), - ok; + {error, E}; {error, E} -> error("file:path_eval(~tp,~tp): ~ts~n", [P, F, file:format_error(E)]), - ok; + {error, E}; Other -> Other end. diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index 31d0d499e3..00e6a10d8a 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -479,7 +479,7 @@ com_enc(_B, _Fun, _N, L, Ps) -> com_enc_end([L | Ps]). com_enc_end(Ps0) -> - Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]), + Ps = lists:reverse([lists:reverse(lowercase(P)) || P <- Ps0]), com_encoding(Ps). com_encoding(["latin","1"|_]) -> @@ -489,6 +489,9 @@ com_encoding(["utf","8"|_]) -> com_encoding(_) -> throw(no). % Don't try any further +lowercase(S) -> + unicode:characters_to_list(string:lowercase(S)). + normalize_typed_record_fields([]) -> {typed, []}; normalize_typed_record_fields(Fields) -> diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 9cd4727dc3..f58cb35cea 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3910,10 +3910,9 @@ check_format_string(Fmt) -> extract_sequences(Fmt, []). extract_sequences(Fmt, Need0) -> - case string:chr(Fmt, $~) of - 0 -> {ok,lists:reverse(Need0)}; %That's it - Pos -> - Fmt1 = string:substr(Fmt, Pos+1), %Skip ~ + case string:find(Fmt, [$~]) of + nomatch -> {ok,lists:reverse(Need0)}; %That's it + [$~|Fmt1] -> case extract_sequence(1, Fmt1, Need0) of {ok,Need1,Rest} -> extract_sequences(Rest, Need1); Error -> Error diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index ee5e7a11bf..367dbefb82 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -237,13 +237,20 @@ lform({attribute,Line,Name,Arg}, Opts) -> lform({function,Line,Name,Arity,Clauses}, Opts) -> lfunction({function,Line,Name,Arity,Clauses}, Opts); %% These are specials to make it easier for the compiler. -lform({error,E}, _Opts) -> - leaf(format("~p\n", [{error,E}])); -lform({warning,W}, _Opts) -> - leaf(format("~p\n", [{warning,W}])); +lform({error,_}=E, Opts) -> + message(E, Opts); +lform({warning,_}=W, Opts) -> + message(W, Opts); lform({eof,_Line}, _Opts) -> $\n. +message(M, #options{encoding = Encoding}) -> + F = case Encoding of + latin1 -> "~p\n"; + unicode -> "~tp\n" + end, + leaf(format(F, [M])). + lattribute({attribute,_Line,type,Type}, Opts) -> [typeattr(type, Type, Opts),leaf(".\n")]; lattribute({attribute,_Line,opaque,Type}, Opts) -> @@ -598,8 +605,6 @@ lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Opts) -> lexpr({named_fun,_,Name,Cs,Extra}, _Prec, Opts) -> {force_nl,fun_info(Extra), {list,[{first,['fun', " "],fun_clauses(Cs, Opts, {named, Name})},'end']}}; -lexpr({'query',_,Lc}, _Prec, Opts) -> - {list,[{step,leaf("query"),lexpr(Lc, 0, Opts)},'end']}; lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Opts) -> case erl_internal:bif(M, F, length(Args)) of true -> @@ -904,7 +909,7 @@ maybe_paren(_P, _Prec, Expr) -> Expr. leaf(S) -> - {leaf,chars_size(S),S}. + {leaf,string:length(S),S}. %%% Do the formatting. Currently nothing fancy. Could probably have %%% done it in one single pass. @@ -964,7 +969,7 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) -> Sizes = BSizeL ++ SizeL, NSepChars = if is_list(Sep), Sep =/= [] -> - erlang:max(0, length(CharsL)-1); + erlang:max(0, length(CharsL)-1); % not string:length true -> 0 end, @@ -1120,7 +1125,7 @@ incr(I, Incr) -> I+Incr. indentation(E, I) when I < 0 -> - chars_size(E); + string:length(E); indentation(E, I0) -> I = io_lib_format:indentation(E, I0), case has_nl(E) of @@ -1157,19 +1162,19 @@ write_a_string(S, I, PP) -> write_a_string([], _N, _Len, _PP) -> []; write_a_string(S, N, Len, PP) -> - SS = string:sub_string(S, 1, N), + SS = string:slice(S, 0, N), Sl = write_string(SS, PP), - case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of + case (string:length(Sl) > Len) and (N > ?MIN_SUBSTRING) of true -> write_a_string(S, N-1, Len, PP); false -> [flat_leaf(Sl) | - write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)] + write_a_string(string:slice(S, string:length(SS)), Len, Len, PP)] end. flat_leaf(S) -> L = lists:flatten(S), - {leaf,length(L),L}. + {leaf,string:length(L),L}. write_value(V, PP) -> (PP#pp.value_fun)(V). @@ -1190,15 +1195,6 @@ write_char(C, PP) -> a0() -> erl_anno:new(0). -chars_size([C | Es]) when is_integer(C) -> - 1 + chars_size(Es); -chars_size([E | Es]) -> - chars_size(E) + chars_size(Es); -chars_size([]) -> - 0; -chars_size(B) when is_binary(B) -> - byte_size(B). - -define(N_SPACES, 30). spacetab() -> diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 47223b129c..4774c4bf19 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2015. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -752,7 +752,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> {char_error,Ncs,Error,Nline,Ncol,EndCol} -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> - Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars. scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %" {Ncs,Nline,Ncol,Nstr,Nwcs} -> Anno = anno(Line0, Col0, St, Nstr), @@ -767,7 +767,7 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> {char_error,Ncs,Error,Nline,Ncol,EndCol} -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> - Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars. scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %' {Ncs,Nline,Ncol,Nstr,Nwcs} -> case catch list_to_atom(Nwcs) of diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl index 2b9d8ff65b..132f8efbbe 100644 --- a/lib/stdlib/src/escript.erl +++ b/lib/stdlib/src/escript.erl @@ -224,8 +224,8 @@ return_sections(S, Bin) -> normalize_section(Name, undefined) -> {Name, undefined}; normalize_section(shebang, "#!" ++ Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(Chopped, both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(Chopped, both), if Stripped =:= ?SHEBANG -> {shebang, default}; @@ -233,8 +233,8 @@ normalize_section(shebang, "#!" ++ Chars) -> {shebang, Stripped} end; normalize_section(comment, Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(string:strip(Chopped, left, $%), both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(string:trim(Chopped, leading, "$%"), both), if Stripped =:= ?COMMENT -> {comment, default}; @@ -242,8 +242,8 @@ normalize_section(comment, Chars) -> {comment, Stripped} end; normalize_section(emu_args, "%%!" ++ Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(Chopped, both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(Chopped, both), {emu_args, Stripped}; normalize_section(Name, Chars) -> {Name, Chars}. diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl index 1db004c91e..b6548626f3 100644 --- a/lib/stdlib/src/ets.erl +++ b/lib/stdlib/src/ets.erl @@ -1719,7 +1719,7 @@ get_line(P, Default) -> line_string(Binary) when is_binary(Binary) -> unicode:characters_to_list(Binary); line_string(Other) -> Other. -nonl(S) -> string:strip(S, right, $\n). +nonl(S) -> string:trim(S, trailing, "$\n"). print_number(Tab, Key, Num) -> Os = ets:lookup(Tab, Key), @@ -1748,7 +1748,7 @@ do_display_item(_Height, Width, I, Opos) -> L = to_string(I), L2 = if length(L) > Width - 8 -> - string:substr(L, 1, Width-13) ++ " ..."; + string:slice(L, 0, Width-13) ++ " ..."; true -> L end, diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d7c313f214..0f90b3fc33 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). %%% Compiling a wildcard. + +%% Define characters used for escaping a \. +-define(ESCAPE_PREFIX, $@). +-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]). +-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]). + %% Only for debugging. compile_wildcard(Pattern) when is_list(Pattern) -> {compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}. -compile_wildcard(Pattern, Cwd0) -> +compile_wildcard(Pattern0, Cwd0) -> + Pattern = convert_escapes(Pattern0), [Root|Rest] = filename:split(Pattern), case filename:pathtype(Root) of relative -> @@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) -> compile_join({root,PrefixLen,Root}, File) -> {root,PrefixLen,filename:join(Root, File)}. -compile_part(Part) -> +compile_part(Part0) -> + Part = wrap_escapes(Part0), compile_part(Part, false, []). compile_part_to_sep(Part) -> @@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) -> error -> compile_part(Rest, Upto, [${|Result]) end; +compile_part([{escaped,X}|Rest], Upto, Result) -> + compile_part(Rest, Upto, [X|Result]); compile_part([X|Rest], Upto, Result) -> compile_part(Rest, Upto, [X|Result]); compile_part([], _Upto, Result) -> @@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); compile_charset1([$]|Rest], Ordset) -> {ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest}; +compile_charset1([{escaped,X}|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([X|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([], _Ordset) -> @@ -486,6 +498,32 @@ compile_alt(Pattern, Result) -> error end. +%% Convert backslashes to an illegal Unicode character to +%% protect in from filename:split/1. + +convert_escapes([?ESCAPE_PREFIX|T]) -> + ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T); +convert_escapes([$\\|T]) -> + ?ESCAPE_CHARACTER ++ convert_escapes(T); +convert_escapes([H|T]) -> + [H|convert_escapes(T)]; +convert_escapes([]) -> + []. + +%% Wrap each escape in a tuple to remove the special meaning for +%% the character that follows. + +wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) -> + [?ESCAPE_PREFIX|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) -> + [{escaped,C}|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER) -> + []; +wrap_escapes([H|T]) -> + [H|wrap_escapes(T)]; +wrap_escapes([]) -> + []. + badpattern(Reason) -> error({badpattern,Reason}). diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index ee807dfd09..a322bd002d 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -34,6 +34,38 @@ %% we flatten the arguments immediately on function entry as that makes %% it easier to ensure that the code works. +%% +%% *** Requirements on Raw Filename Format *** +%% +%% These requirements are due to the 'filename' module +%% in stdlib. This since it is documented that it +%% should be able to operate on raw filenames as well +%% as ordinary filenames. +%% +%% A raw filename *must* be a byte sequence where: +%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded +%% as a byte with the corresponding value. That is, +%% the most significant bit in the byte encoding the +%% codepoint is never set. +%% 2. Codepoints greater than 127 *must* be encoded +%% with the most significant bit set in *every* byte +%% encoding it. +%% +%% Latin1 and UTF-8 meet these requirements while +%% UTF-16 and UTF-32 don't. +%% +%% On Windows filenames are natively stored as malformed +%% UTF-16LE (lonely surrogates may appear). A more correct +%% description than UTF-16 would be an array of 16-bit +%% words... In order to meet the requirements of the +%% raw file format we convert the malformed UTF-16LE to +%% malformed UTF-8 which meet the requirements. +%% +%% Note that these requirements are today only OTP +%% internal (erts-stdlib internal) requirements that +%% could be changed. +%% + -export([absname/1, absname/2, absname_join/2, basename/1, basename/2, dirname/1, extension/1, join/1, join/2, pathtype/1, @@ -41,6 +73,7 @@ safe_relative_path/1]). -export([find_src/1, find_src/2]). % deprecated -export([basedir/2, basedir/3]). +-export([validate/1]). %% Undocumented and unsupported exports. -export([append/2]). @@ -1053,10 +1086,10 @@ basedir_linux(Type) -> user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true); site_data -> Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false), - string:tokens(Base,":"); + string:lexemes(Base, ":"); site_config -> Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false), - string:tokens(Base,":") + string:lexemes(Base, ":") end. -define(basedir_darwin_user_data, "Library/Application Support"). @@ -1152,3 +1185,72 @@ basedir_os_type() -> {win32,_} -> windows; _ -> linux end. + +%% +%% validate/1 +%% + +-spec validate(FileName) -> boolean() when + FileName :: file:name_all(). + +validate(FileName) when is_binary(FileName) -> + %% Raw filename... + validate_bin(FileName); +validate(FileName) when is_list(FileName); + is_atom(FileName) -> + validate_list(FileName, + file:native_name_encoding(), + os:type()). + +validate_list(FileName, Enc, Os) -> + try + true = validate_list(FileName, Enc, Os, 0) > 0 + catch + _ : _ -> false + end. + +validate_list([], _Enc, _Os, Chars) -> + Chars; +validate_list(C, Enc, Os, Chars) when is_integer(C) -> + validate_char(C, Enc, Os), + Chars+1; +validate_list(A, Enc, Os, Chars) when is_atom(A) -> + validate_list(atom_to_list(A), Enc, Os, Chars); +validate_list([H|T], Enc, Os, Chars) -> + NewChars = validate_list(H, Enc, Os, Chars), + validate_list(T, Enc, Os, NewChars). + +%% C is always an integer... +% validate_char(C, _, _) when not is_integer(C) -> +% throw(invalid); +validate_char(C, _, _) when C < 1 -> + throw(invalid); %% No negative or null characters... +validate_char(C, latin1, _) when C > 255 -> + throw(invalid); +validate_char(C, utf8, _) when C >= 16#110000 -> + throw(invalid); +validate_char(C, utf8, {win32, _}) when C > 16#ffff -> + throw(invalid); %% invalid win wchar... +validate_char(_C, utf8, {win32, _}) -> + ok; %% Range below is accepted on windows... +validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF -> + throw(invalid); %% invalid unicode range... +validate_char(_, _, _) -> + ok. + +validate_bin(Bin) -> + %% Raw filename. That is, we do not interpret + %% the encoding, but we still do not accept + %% null characters... + try + true = validate_bin(Bin, 0) > 0 + catch + _ : _ -> false + end. + +validate_bin(<<>>, Bs) -> + Bs; +validate_bin(<<0, _Rest/binary>>, _Bs) -> + throw(invalid); %% No null characters allowed... +validate_bin(<<_B, Rest/binary>>, Bs) -> + validate_bin(Rest, Bs+1). diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl index 7daa7a9fe4..ac172325b5 100644 --- a/lib/stdlib/src/gen_server.erl +++ b/lib/stdlib/src/gen_server.erl @@ -116,23 +116,27 @@ %%%========================================================================= -callback init(Args :: term()) -> - {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate} | + {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate | {continue, term()}} | {stop, Reason :: term()} | ignore. -callback handle_call(Request :: term(), From :: {pid(), Tag :: term()}, State :: term()) -> {reply, Reply :: term(), NewState :: term()} | - {reply, Reply :: term(), NewState :: term(), timeout() | hibernate} | + {reply, Reply :: term(), NewState :: term(), timeout() | hibernate | {continue, term()}} | {noreply, NewState :: term()} | - {noreply, NewState :: term(), timeout() | hibernate} | + {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} | {stop, Reason :: term(), Reply :: term(), NewState :: term()} | {stop, Reason :: term(), NewState :: term()}. -callback handle_cast(Request :: term(), State :: term()) -> {noreply, NewState :: term()} | - {noreply, NewState :: term(), timeout() | hibernate} | + {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} | {stop, Reason :: term(), NewState :: term()}. -callback handle_info(Info :: timeout | term(), State :: term()) -> {noreply, NewState :: term()} | - {noreply, NewState :: term(), timeout() | hibernate} | + {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} | + {stop, Reason :: term(), NewState :: term()}. +-callback handle_continue(Info :: term(), State :: term()) -> + {noreply, NewState :: term()} | + {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} | {stop, Reason :: term(), NewState :: term()}. -callback terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), @@ -149,7 +153,7 @@ Status :: term(). -optional_callbacks( - [handle_info/2, terminate/2, code_change/3, format_status/2]). + [handle_info/2, handle_continue/2, terminate/2, code_change/3, format_status/2]). %%% ----------------------------------------------------------------- %%% Starts a generic server. @@ -309,7 +313,7 @@ enter_loop(Mod, Options, State, ServerName, Timeout) -> Name = gen:get_proc_name(ServerName), Parent = gen:get_parent(), Debug = gen:debug_options(Name, Options), - HibernateAfterTimeout = gen:hibernate_after(Options), + HibernateAfterTimeout = gen:hibernate_after(Options), loop(Parent, Name, State, Mod, Timeout, HibernateAfterTimeout, Debug). %%%======================================================================== @@ -374,6 +378,19 @@ init_it(Mod, Args) -> %%% --------------------------------------------------- %%% The MAIN loop. %%% --------------------------------------------------- + +loop(Parent, Name, State, Mod, {continue, Continue} = Msg, HibernateAfterTimeout, Debug) -> + Reply = try_dispatch(Mod, handle_continue, Continue, State), + case Debug of + [] -> + handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod, + HibernateAfterTimeout, State); + _ -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, Msg), + handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod, + HibernateAfterTimeout, State, Debug1) + end; + loop(Parent, Name, State, Mod, hibernate, HibernateAfterTimeout, Debug) -> proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod, HibernateAfterTimeout, Debug]); diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl index 1110d18af6..cd6312855d 100644 --- a/lib/stdlib/src/gen_statem.erl +++ b/lib/stdlib/src/gen_statem.erl @@ -296,7 +296,7 @@ (Reason :: term()). %% Format the callback module state in some sensible that is -%% often condensed way. For StatusOption =:= 'normal' the perferred +%% often condensed way. For StatusOption =:= 'normal' the preferred %% return term is [{data,[{"State",FormattedState}]}], and for %% StatusOption =:= 'terminate' it is just FormattedState. -callback format_status( @@ -510,8 +510,6 @@ call(ServerRef, Request, Timeout) -> parse_timeout(Timeout) -> case Timeout of - {clean_timeout,infinity} -> - {dirty_timeout,infinity}; {clean_timeout,_} -> Timeout; {dirty_timeout,_} -> diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 4b2d15c8b3..e345810ca0 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -380,7 +380,7 @@ float_e(_Fl, {Ds,E}, P) -> {Fs,false} -> [Fs|float_exp(E-1)] end. -%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}. +%% float_man([Digit], Icount, Dcount) -> {[Char],CarryFlag}. %% Generate the characters in the mantissa from the digits with Icount %% characters before the '.' and Dcount decimals. Handle carry and let %% caller decide what to do at top. @@ -395,7 +395,7 @@ float_man([D|Ds], I, Dc) -> {Cs,false} -> {[D|Cs],false} end; float_man([], I, Dc) -> %Pad with 0's - {string:chars($0, I, [$.|string:chars($0, Dc)]),false}. + {lists:duplicate(I, $0) ++ [$.|lists:duplicate(Dc, $0)],false}. float_man([D|_], 0) when D >= $5 -> {[],true}; float_man([_|_], 0) -> {[],false}; @@ -405,7 +405,7 @@ float_man([D|Ds], Dc) -> {Cs,true} -> {[D+1|Cs],false}; {Cs,false} -> {[D|Cs],false} end; -float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's +float_man([], Dc) -> {lists:duplicate(Dc, $0),false}. %Pad with 0's %% float_exp(Exponent) -> [Char]. %% Generate the exponent of a floating point number. Always include sign. @@ -429,7 +429,7 @@ fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 -> float_f(Fl, Fd, P) when Fl < 0.0 -> [$-|float_f(-Fl, Fd, P)]; float_f(Fl, {Ds,E}, P) when E =< 0 -> - float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's + float_f(Fl, {lists:duplicate(-E+1, $0)++Ds,1}, P); %Prepend enough 0's float_f(_Fl, {Ds,E}, P) -> case float_man(Ds, E, P) of {Fs,true} -> "1" ++ Fs; %Handle carry @@ -751,7 +751,7 @@ adjust(Data, Pad, right) -> [Pad|Data]. flat_trunc(List, N) when is_integer(N), N >= 0 -> string:slice(List, 0, N). -%% A deep version of string:chars/2,3 +%% A deep version of lists:duplicate/2 chars(_C, 0) -> []; diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl index c6eb0d7915..a7980cc294 100644 --- a/lib/stdlib/src/lib.erl +++ b/lib/stdlib/src/lib.erl @@ -646,7 +646,7 @@ pp_arguments(PF, As, I, Enc) -> Ll = length(L), A = list_to_atom(lists:duplicate(Ll, $a)), S0 = unicode:characters_to_list(PF([A | T], I+1), Enc), - brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc); + brackets_to_parens([$[,L,string:slice(S0, 1+Ll)], Enc); _ -> brackets_to_parens(PF(As, I+1), Enc) end. diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl index 5b488cc677..122b476ddb 100644 --- a/lib/stdlib/src/otp_internal.erl +++ b/lib/stdlib/src/otp_internal.erl @@ -609,6 +609,52 @@ obsolete_1(filename, find_src, 2) -> obsolete_1(erlang, hash, 2) -> {removed, {erlang, phash2, 2}, "20.0"}; +%% Added in OTP-21 +obsolete_1(string, len, 1) -> + {deprecated, "deprecated; use string:length/3 instead"}; +obsolete_1(string, concat, 2) -> + {deprecated, "deprecated; use [Str1,Str2] instead"}; +obsolete_1(string, str, 2) -> + {deprecated, "deprecated; use string:find/2 instead"}; +obsolete_1(string, rstr, 2) -> + {deprecated, "deprecated; use string:find/3 instead"}; +obsolete_1(string, chr, 2) -> + {deprecated, "deprecated; use string:find/2 instead"}; +obsolete_1(string, rchr, 2) -> + {deprecated, "deprecated; use string:find/3 instead"}; +obsolete_1(string, span, 2) -> + {deprecated, "deprecated; use string:take/2 instead"}; +obsolete_1(string, cspan, 2) -> + {deprecated, "deprecated; use string:take/3 instead"}; +obsolete_1(string, substr, _) -> + {deprecated, "deprecated; use string:slice/3 instead"}; +obsolete_1(string, tokens, 2) -> + {deprecated, "deprecated; use string:lexemes/2 instead"}; +obsolete_1(string, chars, _) -> + {deprecated, "deprecated; use lists:duplicate/2 instead"}; +obsolete_1(string, copies, _) -> + {deprecated, "deprecated; use lists:duplicate/2 instead"}; +obsolete_1(string, words, _) -> + {deprecated, "deprecated; use string:lexemes/2 instead"}; +obsolete_1(string, strip, _) -> + {deprecated, "deprecated; use string:trim/3 instead"}; +obsolete_1(string, sub_word, _) -> + {deprecated, "deprecated; use string:nth_lexeme/3 instead"}; +obsolete_1(string, sub_string, _) -> + {deprecated, "deprecated; use string:slice/3 instead"}; +obsolete_1(string, left, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, right, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, centre, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, join, _) -> + {deprecated, "deprecated; use lists:join/2 instead"}; +obsolete_1(string, to_upper, _) -> + {deprecated, "deprecated; use string:uppercase/1 or string:titlecase/1 instead"}; +obsolete_1(string, to_lower, _) -> + {deprecated, "deprecated; use string:lowercase/1 or string:casefold/1 instead"}; + %% not obsolete obsolete_1(_, _, _) -> diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl index 05950a1d7c..b12ff205b1 100644 --- a/lib/stdlib/src/pool.erl +++ b/lib/stdlib/src/pool.erl @@ -25,7 +25,7 @@ %% with the least load !!!! %% This function is callable from any node including the master %% That is part of the pool -%% nodes are scheduled on a per usgae basis and per load basis, +%% nodes are scheduled on a per usage basis and per load basis, %% Whenever we use a node, we put at the end of the queue, and whenever %% a node report a change in load, we insert it accordingly @@ -197,7 +197,7 @@ pure_insert({Load,Node},[{L,N}|Tail]) when Load < L -> pure_insert(L,[H|T]) -> [H|pure_insert(L,T)]. %% Really should not measure the contributions from -%% the back ground processes here .... which we do :-( +%% the background processes here .... which we do :-( %% We don't have to monitor the master, since we're slaves anyway statistic_collector() -> @@ -213,7 +213,7 @@ statistic_collector(I) -> stat_loop(M, 999999) end. -%% Do not tell the master about our load if it has not changed +%% Do not tell the master about our load if it has not changed stat_loop(M, Old) -> sleep(2000), diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl index 7a8a5e6d4a..362e98006e 100644 --- a/lib/stdlib/src/rand.erl +++ b/lib/stdlib/src/rand.erl @@ -21,8 +21,8 @@ %% Multiple PRNG module for Erlang/OTP %% Copyright (c) 2015-2016 Kenji Rikitake %% -%% exrop (xoroshiro116+) added and statistical distribution -%% improvements by the Erlang/OTP team 2017 +%% exrop (xoroshiro116+) added, statistical distribution +%% improvements and uniform_real added by the Erlang/OTP team 2017 %% ===================================================================== -module(rand). @@ -30,10 +30,14 @@ -export([seed_s/1, seed_s/2, seed/1, seed/2, export_seed/0, export_seed_s/1, uniform/0, uniform/1, uniform_s/1, uniform_s/2, + uniform_real/0, uniform_real_s/1, jump/0, jump/1, normal/0, normal/2, normal_s/1, normal_s/3 ]). +%% Debug +-export([make_float/3, float2str/1, bc64/1]). + -compile({inline, [exs64_next/1, exsplus_next/1, exs1024_next/1, exs1024_calc/2, exrop_next/1, exrop_next_s/2, @@ -60,6 +64,10 @@ %% N i evaluated 3 times (?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))). +-define( + BC(V, N), + bc((V), ?BIT((N) - 1), N)). + %%-define(TWO_POW_MINUS53, (math:pow(2, -53))). -define(TWO_POW_MINUS53, 1.11022302462515657e-16). @@ -84,14 +92,21 @@ %% The 'bits' field indicates how many bits the integer %% returned from 'next' has got, i.e 'next' shall return %% an random integer in the range 0..(2^Bits - 1). -%% At least 53 bits is required for the floating point -%% producing fallbacks. This field is only used when -%% the 'uniform' or 'uniform_n' fields are not defined. +%% At least 55 bits is required for the floating point +%% producing fallbacks, but 56 bits would be more future proof. %% %% The fields 'next', 'uniform' and 'uniform_n' -%% implement the algorithm. If 'uniform' or 'uinform_n' +%% implement the algorithm. If 'uniform' or 'uniform_n' %% is not present there is a fallback using 'next' and either -%% 'bits' or the deprecated 'max'. +%% 'bits' or the deprecated 'max'. The 'next' function +%% must generate a word with at least 56 good random bits. +%% +%% The 'weak_low_bits' field indicate how many bits are of +%% lesser quality and they will not be used by the floating point +%% producing functions, nor by the range producing functions +%% when more bits are needed, to avoid weak bits in the middle +%% of the generated bits. The lowest bits from the range +%% functions still have the generator's quality. %% -type alg_handler() :: #{type := alg(), @@ -148,11 +163,7 @@ %% For ranges larger than the algorithm bit size uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) -> - WeakLowBits = - case Alg of - #{weak_low_bits:=WLB} -> WLB; - #{} -> 0 - end, + WeakLowBits = maps:get(weak_low_bits, Alg, 0), %% Maybe waste the lowest bit(s) when shifting in new bits Shift = Bits - WeakLowBits, ShiftMask = bnot ?MASK(WeakLowBits), @@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> {(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}}; uniform_s({#{max:=Max, next:=Next} = Alg, R0}) -> {V, R1} = Next(R0), - %% Old broken algorithm with non-uniform density + %% Old algorithm with non-uniform density {V / (Max + 1), {Alg, R1}}. @@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0}) ?uniform_range(N, Alg, R1, V, MaxMinusN, I); uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0}) when is_integer(N), 1 =< N -> - %% Old broken algorithm with skewed probability + %% Old algorithm with skewed probability %% and gap in ranges > Max {V, R1} = Next(R0), if @@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0}) {trunc(F * N) + 1, {Alg, R1}} end. +%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0, +%% updating the state in the process dictionary. + +-spec uniform_real() -> X :: float(). +uniform_real() -> + {X, Seed} = uniform_real_s(seed_get()), + _ = seed_put(Seed), + X. + +%% uniform_real_s/1: given a state, uniform_s/1 +%% returns a random float X where 0.0 < X =< 1.0, +%% and a new state. +%% +%% This function does not use the same form of uniformity +%% as the uniform_s/1 function. +%% +%% Instead, this function does not generate numbers with equal +%% distance in the interval, but rather tries to keep all mantissa +%% bits random also for small numbers, meaning that the distance +%% between possible numbers decreases when the numbers +%% approaches 0.0, as does the possibility for a particular +%% number. Hence uniformity is preserved. +%% +%% To generate 56 bits at the time instead of 53 is actually +%% a speed optimization since the probability to have to +%% generate a second word decreases by 1/2 for every extra bit. +%% +%% This function generates normalized numbers, so the smallest number +%% that can be generated is 2^-1022 with the distance 2^-1074 +%% to the next to smallest number, compared to 2^-53 for uniform_s/1. +%% +%% This concept of uniformity should work better for applications +%% where you need to calculate 1.0/X or math:log(X) since those +%% operations benefits from larger precision approaching 0.0, +%% and that this function does not return 0.0 nor denormalized +%% numbers very close to 0.0. The log() operation in The Box-Muller +%% transformation for normal distribution is an example of this. +%% +%%-define(TWO_POW_MINUS55, (math:pow(2, -55))). +%%-define(TWO_POW_MINUS110, (math:pow(2, -110))). +%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17). +%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34). +%% +-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}. +uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number without using the weak low bits. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits) + end; +uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) -> + %% Generate a 56 bit number. + %% Ignore the weak low bits for these old algorithms, + %% just produce something reasonable. + %% + %% Be sure to use only 53 bits when multiplying with + %% math:pow(2.0, -N) to avoid rounding which would make + %% "even" floats more probable than "odd". + %% + {V1, R1} = Next(R0), + M1 = ?MASK(56, V1), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, -56), {Alg, R1}}; + true -> + %% Need more bits + {V2, R2} = Next(R1), + uniform_real_s(Alg, Next, M1, -56, R2, V2, 56) + end. + +uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56 + %% This is a very theoretical bottom case. + %% The odds of getting here is about 2^-1008, + %% through a white box test case, or thanks to + %% a malfunctioning PRNG producing 18 56-bit zeros in a row. + %% + %% Fill up to 53 bits, we have at most 52 + B0 = (53 - ?BC(M0, 52)), % Missing bits + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)), + {Alg, R1}}; +uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) -> + if + %% Optimize the most probable. + %% Fill up to 53 bits. + ?BIT(51) =< M0 -> + %% We have 52 bits in M0 - need 1 + {(((M0 bsl 1) bor (V1 bsr (Bits - 1))) + * math:pow(2.0, BitNo - 1)), + {Alg, R1}}; + ?BIT(50) =< M0 -> + %% We have 51 bits in M0 - need 2 + {(((M0 bsl 2) bor (V1 bsr (Bits - 2))) + * math:pow(2.0, BitNo - 2)), + {Alg, R1}}; + ?BIT(49) =< M0 -> + %% We have 50 bits in M0 - need 3 + {(((M0 bsl 3) bor (V1 bsr (Bits - 3))) + * math:pow(2.0, BitNo - 3)), + {Alg, R1}}; + M0 == 0 -> + M1 = V1 bsr (Bits - 56), + if + ?BIT(55) =< M1 -> + %% We have 56 bits - waste 3 + {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}}; + ?BIT(54) =< M1 -> + %% We have 55 bits - waste 2 + {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}}; + ?BIT(53) =< M1 -> + %% We have 54 bits - waste 1 + {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}}; + ?BIT(52) =< M1 -> + %% We have 53 bits - use all + {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}}; + BitNo =:= -1008 -> + %% Endgame + %% For the last round we can not have 14 zeros or more + %% at the top of M1 because then we will underflow, + %% so we need at least 43 bits + if + ?BIT(42) =< M1 -> + %% We have 43 bits - get the last bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1); + true -> + %% Would underflow 2^-1022 - start all over + %% + %% We could just crash here since the odds for + %% the PRNG being broken is much higher than + %% for a good PRNG generating this many zeros + %% in a row. Maybe we should write an error + %% report or call this a system limit...? + uniform_real_s({Alg, R1}) + end; + true -> + %% Need more bits + uniform_real_s(Alg, Next, M1, BitNo - 56, R1) + end; + true -> + %% Fill up to 53 bits + B0 = 53 - ?BC(M0, 49), % Number of bits we need to append + {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) + * math:pow(2.0, BitNo - B0)), + {Alg, R1}} + end. +%% +uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits); +uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) -> + {V1, R1} = Next(R0), + uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56). + %% jump/1: given a state, jump/1 %% returns a new state which is equivalent to that %% after a large number of call defined for each algorithm. @@ -1025,3 +1219,42 @@ normal_fi(Indx) -> 1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03, 5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03, 1.2602859304985975e-03}). + +%%%bitcount64(0) -> 0; +%%%bitcount64(V) -> 1 + bitcount(V, 64). +%%% +%%%-define( +%%% BITCOUNT(V, N), +%%% bitcount(V, N) -> +%%% if +%%% (1 bsl ((N) bsr 1)) =< (V) -> +%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1)); +%%% true -> +%%% bitcount((V), ((N) bsr 1)) +%%% end). +%%%?BITCOUNT(V, 64); +%%%?BITCOUNT(V, 32); +%%%?BITCOUNT(V, 16); +%%%?BITCOUNT(V, 8); +%%%?BITCOUNT(V, 4); +%%%?BITCOUNT(V, 2); +%%%bitcount(_, 1) -> 0. + +bc64(V) -> ?BC(V, 64). + +%% Linear from high bit - higher probability first gives faster execution +bc(V, B, N) when B =< V -> N; +bc(V, B, N) -> bc(V, B bsr 1, N - 1). + +make_float(S, E, M) -> + <<F/float>> = <<S:1, E:11, M:52>>, + F. + +float2str(N) -> + <<S:1, E:11, M:52>> = <<(float(N))/float>>, + lists:flatten( + io_lib:format( + "~c~c.~13.16.0bE~b", + [case S of 1 -> $-; 0 -> $+ end, + case E of 0 -> $0; _ -> $1 end, + M, E - 16#3ff])). diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl index d7cf6386f5..b3f3206d67 100644 --- a/lib/stdlib/src/slave.erl +++ b/lib/stdlib/src/slave.erl @@ -320,7 +320,7 @@ mk_cmd(Host, Name, Args, Waiter, Prog0) -> %% emulator and flags as the test node. The return from lib:progname() %% could then typically be '/<full_path_to>/cerl -gcov'). quote_progname(Progname) -> - do_quote_progname(string:tokens(to_list(Progname)," ")). + do_quote_progname(string:lexemes(to_list(Progname)," ")). do_quote_progname([Prog]) -> "\""++Prog++"\""; diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index 3c449d3cb9..5fb48acfab 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -101,13 +101,14 @@ timer, unicode, unicode_util, + uri_string, win32reg, zip]}, {registered,[timer_server,rsh_starter,take_over_monitor,pool_master, dets]}, {applications, [kernel]}, {env, []}, - {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3", + {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3", "compiler-5.0"]} ]}. diff --git a/lib/stdlib/src/stdlib.appup.src b/lib/stdlib/src/stdlib.appup.src index 800c2c61f3..e4e3fb83e9 100644 --- a/lib/stdlib/src/stdlib.appup.src +++ b/lib/stdlib/src/stdlib.appup.src @@ -18,9 +18,7 @@ %% %CopyrightEnd% {"%VSN%", %% Up from - max one major revision back - [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.* - {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.* + [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.* %% Down to - max one major revision back - [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.* - {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.* + [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.* }. diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index 4972da297d..5a4d2df2a6 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -87,6 +87,16 @@ %%% May be removed -export([list_to_float/1, list_to_integer/1]). +-deprecated([{len,1},{concat,2}, + {str,2},{chr,2},{rchr,2},{rstr,2}, + {span,2},{cspan,2},{substr,'_'},{tokens,2}, + {chars,'_'}, + {copies,2},{words,'_'},{strip,'_'}, + {sub_word,'_'},{left,'_'},{right,'_'}, + {sub_string,'_'},{centre,'_'},{join,2}, + {to_upper,1}, {to_lower,1} + ]). + %% Uses bifs: string:list_to_float/1 and string:list_to_integer/1 -spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when String :: string(), diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl new file mode 100644 index 0000000000..22212da222 --- /dev/null +++ b/lib/stdlib/src/uri_string.erl @@ -0,0 +1,1842 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%% +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%% +%% [RFC 3986, Chapter 3. Syntax Components] +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% +%% +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%% +%% +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%% +%% +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%% +%% +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%% +%% +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%% +%% +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%% +%% +%% [RFC 3986, Chapter 4.3. Absolute URI] +%% +%% Some protocol elements allow only the absolute form of a URI without +%% a fragment identifier. For example, defining a base URI for later +%% use by relative references calls for an absolute-URI syntax rule that +%% does not allow a fragment. +%% +%% absolute-URI = scheme ":" hier-part [ "?" query ] +%% +-module(uri_string). + +%%------------------------------------------------------------------------- +%% External API +%%------------------------------------------------------------------------- +-export([normalize/1, parse/1, + recompose/1, transcode/2]). +-export_type([error/0, uri_map/0, uri_string/0]). + + +%%------------------------------------------------------------------------- +%% Internal API +%%------------------------------------------------------------------------- +-export([is_host/1, is_path/1]). % suppress warnings + + +%%------------------------------------------------------------------------- +%% Macros +%%------------------------------------------------------------------------- +-define(CHAR(Char), <<Char/utf8>>). +-define(STRING_EMPTY, <<>>). +-define(STRING(MatchStr), <<MatchStr/binary>>). +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(DEC2HEX(X), + if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; + ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 + end). + +-define(HEX2DEC(X), + if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; + ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; + ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 + end). + + +%%%========================================================================= +%%% API +%%%========================================================================= + +%%------------------------------------------------------------------------- +%% URI compliant with RFC 3986 +%% ASCII %x21 - %x7A ("!" - "z") except +%% %x34 " double quote +%% %x60 < less than +%% %x62 > greater than +%% %x92 \ backslash +%% %x94 ^ caret / circumflex +%% %x96 ` grave / accent +%%------------------------------------------------------------------------- +-type uri_string() :: iodata(). +-type error() :: {error, atom(), term()}. + + +%%------------------------------------------------------------------------- +%% RFC 3986, Chapter 3. Syntax Components +%%------------------------------------------------------------------------- +-type uri_map() :: + #{fragment => unicode:chardata(), + host => unicode:chardata(), + path => unicode:chardata(), + port => non_neg_integer() | undefined, + query => unicode:chardata(), + scheme => unicode:chardata(), + userinfo => unicode:chardata()} | #{}. + + +%%------------------------------------------------------------------------- +%% Normalize URIs +%%------------------------------------------------------------------------- +-spec normalize(URIString) -> NormalizedURI when + URIString :: uri_string(), + NormalizedURI :: uri_string(). +normalize(URIString) -> + %% Percent-encoding normalization and case normalization for + %% percent-encoded triplets are achieved by running parse and + %% recompose on the input URI string. + recompose( + normalize_path_segment( + normalize_scheme_based( + normalize_case( + parse(URIString))))). + + +%%------------------------------------------------------------------------- +%% Parse URIs +%%------------------------------------------------------------------------- +-spec parse(URIString) -> URIMap when + URIString :: uri_string(), + URIMap :: uri_map() + | error(). +parse(URIString) when is_binary(URIString) -> + try parse_uri_reference(URIString, #{}) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +parse(URIString) when is_list(URIString) -> + try + Binary = unicode:characters_to_binary(URIString), + Map = parse_uri_reference(Binary, #{}), + convert_mapfields_to_list(Map) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%------------------------------------------------------------------------- +%% Recompose URIs +%%------------------------------------------------------------------------- +-spec recompose(URIMap) -> URIString when + URIMap :: uri_map(), + URIString :: uri_string() + | error(). +recompose(Map) -> + case is_valid_map(Map) of + false -> + {error, invalid_map, Map}; + true -> + try + T0 = update_scheme(Map, empty), + T1 = update_userinfo(Map, T0), + T2 = update_host(Map, T1), + T3 = update_port(Map, T2), + T4 = update_path(Map, T3), + T5 = update_query(Map, T4), + update_fragment(Map, T5) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end + end. + + +%%------------------------------------------------------------------------- +%% Transcode URIs +%%------------------------------------------------------------------------- +-spec transcode(URIString, Options) -> Result when + URIString :: uri_string(), + Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], + Result :: uri_string() + | error(). +transcode(URIString, Options) when is_binary(URIString) -> + try + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + List = convert_to_list(URIString, InEnc), + Output = transcode(List, [], InEnc, OutEnc), + convert_to_binary(Output, utf8, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end; +transcode(URIString, Options) when is_list(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + Flattened = flatten_list(URIString, InEnc), + try transcode(Flattened, [], InEnc, OutEnc) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +%%------------------------------------------------------------------------- +%% Converts Map fields to lists +%%------------------------------------------------------------------------- +convert_mapfields_to_list(Map) -> + Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V); + (_, V) -> V end, + maps:map(Fun, Map). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%%------------------------------------------------------------------------- +-spec parse_uri_reference(binary(), uri_map()) -> uri_map(). +parse_uri_reference(<<>>, _) -> #{path => <<>>}; +parse_uri_reference(URIString, URI) -> + try parse_scheme_start(URIString, URI) + catch + throw:{_,_,_} -> + parse_relative_part(URIString, URI) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_relative_part(binary(), uri_map()) -> uri_map(). +parse_relative_part(?STRING_REST("//", Rest), URI) -> + %% Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{userinfo => decode_userinfo(Userinfo)} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{host => decode_host(remove_brackets(Host))} + end; +parse_relative_part(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST($/, Path))}; +parse_relative_part(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{query => decode_query(Query)}; +parse_relative_part(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{fragment => decode_fragment(Fragment)}; +parse_relative_part(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> + {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme + Path = calculate_parsed_part(Rest, T), + URI1#{path => decode_path(?STRING_REST(Char, Path))}; + false -> throw({error,invalid_uri,[Char]}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0<pchar> +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% path-abempty +%%------------------------------------------------------------------------- +-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment(?STRING_REST(Char, Rest), URI) -> + case is_pchar(Char) of + true -> parse_segment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% path-noscheme +%%------------------------------------------------------------------------- +-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_segment_nz_nc(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is pchar. +-spec is_pchar(char()) -> boolean(). +is_pchar($%) -> true; % pct-encoded +is_pchar($:) -> true; +is_pchar($@) -> true; +is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is segment_nz_nc. +-spec is_segment_nz_nc(char()) -> boolean(). +is_segment_nz_nc($%) -> true; % pct-encoded +is_segment_nz_nc($@) -> true; +is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%%------------------------------------------------------------------------- +-spec parse_scheme_start(binary(), uri_map()) -> uri_map(). +parse_scheme_start(?STRING_REST(Char, Rest), URI) -> + case is_alpha(Char) of + true -> {T, URI1} = parse_scheme(Rest, URI), + Scheme = calculate_parsed_scheme(Rest, T), + URI2 = maybe_add_path(URI1), + URI2#{scheme => ?STRING_REST(Char, Scheme)}; + false -> throw({error,invalid_uri,[Char]}) + end. + +%% Add path component if it missing after parsing the URI. +%% According to the URI specification there is always a +%% path component in every URI-reference and it can be +%% empty. +maybe_add_path(Map) -> + case maps:is_key(path, Map) of + false -> + Map#{path => <<>>}; + _Else -> + Map + end. + + +-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. +parse_scheme(?STRING_REST($:, Rest), URI) -> + {_, URI1} = parse_hier(Rest, URI), + {Rest, URI1}; +parse_scheme(?STRING_REST(Char, Rest), URI) -> + case is_scheme(Char) of + true -> parse_scheme(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_scheme(?STRING_EMPTY, _URI) -> + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in scheme +-spec is_scheme(char()) -> boolean(). +is_scheme($+) -> true; +is_scheme($-) -> true; +is_scheme($.) -> true; +is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). + + +%%------------------------------------------------------------------------- +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. +parse_hier(?STRING_REST("//", Rest), URI) -> + % Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + Userinfo = calculate_parsed_userinfo(Rest, T), + {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} + catch + throw:{_,_,_} -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}} + end; +parse_hier(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_hier(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_hier(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless + case is_pchar(Char) of + true -> % segment_nz + {T, URI1} = parse_segment(Rest, URI), + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}}; + false -> throw({error,invalid_uri,[Char]}) + end; +parse_hier(?STRING_EMPTY, URI) -> + {<<>>, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% The authority component is preceded by a double slash ("//") and is +%% terminated by the next slash ("/"), question mark ("?"), or number +%% sign ("#") character, or by the end of the URI. +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%%------------------------------------------------------------------------- +-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. +parse_userinfo(?CHAR($@), URI) -> + {?STRING_EMPTY, URI#{host => <<>>}}; +parse_userinfo(?STRING_REST($@, Rest), URI) -> + {T, URI1} = parse_host(Rest, URI), + Host = calculate_parsed_host_port(Rest, T), + {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; +parse_userinfo(?STRING_REST(Char, Rest), URI) -> + case is_userinfo(Char) of + true -> parse_userinfo(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_userinfo(?STRING_EMPTY, _URI) -> + %% URI cannot end in userinfo state + throw({error,invalid_uri,<<>>}). + + +%% Check if char is allowed in userinfo +-spec is_userinfo(char()) -> boolean(). +is_userinfo($%) -> true; % pct-encoded +is_userinfo($:) -> true; +is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%%------------------------------------------------------------------------- +-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. +parse_host(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_host(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_host(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_host(?STRING_REST($[, Rest), URI) -> + parse_ipv6_bin(Rest, [], URI); +parse_host(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_host(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_ipv4_bin(Rest, [Char], URI); + false -> parse_reg_name(?STRING_REST(Char, Rest), URI) + end; +parse_host(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. +parse_reg_name(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_reg_name(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_reg_name(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_reg_name(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_reg_name(?STRING_REST(Char, Rest), URI) -> + case is_reg_name(Char) of + true -> parse_reg_name(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_reg_name(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +%% Check if char is allowed in reg-name +-spec is_reg_name(char()) -> boolean(). +is_reg_name($%) -> true; +is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv4(Char) of + true -> parse_ipv4_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> + _ = validate_ipv4_address(lists:reverse(Acc)), + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in IPv4 addresses +-spec is_ipv4(char()) -> boolean(). +is_ipv4($.) -> true; +is_ipv4(Char) -> is_digit(Char). + +-spec validate_ipv4_address(list()) -> list(). +validate_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> + _ = validate_ipv6_address(lists:reverse(Acc)), + parse_ipv6_bin_end(Rest, URI); +parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin(Rest, [Char|Acc], URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> + throw({error,invalid_uri,<<>>}). + +%% Check if char is allowed in IPv6 addresses +-spec is_ipv6(char()) -> boolean(). +is_ipv6($:) -> true; +is_ipv6($.) -> true; +is_ipv6(Char) -> is_hex_digit(Char). + + +-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. +parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + H = calculate_parsed_host_port(Rest, T), + Port = get_port(H), + {Rest, URI1#{port => Port}}; +parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> + case is_ipv6(Char) of + true -> parse_ipv6_bin_end(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_ipv6_bin_end(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec validate_ipv6_address(list()) -> list(). +validate_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> Addr; + {error, _} -> throw({error,invalid_uri,Addr}) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%%------------------------------------------------------------------------- +-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. +parse_port(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + Path = calculate_parsed_part(Rest, T), + {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; +parse_port(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + Query = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{query => decode_query(Query)}}; +parse_port(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_port(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_port(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_port(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. +parse_query(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + Fragment = calculate_parsed_query_fragment(Rest, T), + {Rest, URI1#{fragment => decode_fragment(Fragment)}}; +parse_query(?STRING_REST(Char, Rest), URI) -> + case is_query(Char) of + true -> parse_query(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_query(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in query +-spec is_query(char()) -> boolean(). +is_query($/) -> true; +is_query($?) -> true; +is_query(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_fragment(?STRING_REST(Char, Rest), URI) -> + case is_fragment(Char) of + true -> parse_fragment(Rest, URI); + false -> throw({error,invalid_uri,[Char]}) + end; +parse_fragment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + + +%% Check if char is allowed in fragment +-spec is_fragment(char()) -> boolean(). +is_fragment($/) -> true; +is_fragment($?) -> true; +is_fragment(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%%------------------------------------------------------------------------- + +%% Check if char is sub-delim. +-spec is_sub_delim(char()) -> boolean(). +is_sub_delim($!) -> true; +is_sub_delim($$) -> true; +is_sub_delim($&) -> true; +is_sub_delim($') -> true; +is_sub_delim($() -> true; +is_sub_delim($)) -> true; + +is_sub_delim($*) -> true; +is_sub_delim($+) -> true; +is_sub_delim($,) -> true; +is_sub_delim($;) -> true; +is_sub_delim($=) -> true; +is_sub_delim(_) -> false. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%%------------------------------------------------------------------------- +-spec is_unreserved(char()) -> boolean(). +is_unreserved($-) -> true; +is_unreserved($.) -> true; +is_unreserved($_) -> true; +is_unreserved($~) -> true; +is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). + +-spec is_alpha(char()) -> boolean(). +is_alpha(C) + when $A =< C, C =< $Z; + $a =< C, C =< $z -> true; +is_alpha(_) -> false. + +-spec is_digit(char()) -> boolean(). +is_digit(C) + when $0 =< C, C =< $9 -> true; +is_digit(_) -> false. + +-spec is_hex_digit(char()) -> boolean(). +is_hex_digit(C) + when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; +is_hex_digit(_) -> false. + + +%% Remove enclosing brackets from binary +-spec remove_brackets(binary()) -> binary(). +remove_brackets(<<$[/utf8, Rest/binary>>) -> + {H,T} = split_binary(Rest, byte_size(Rest) - 1), + case T =:= <<$]/utf8>> of + true -> H; + false -> Rest + end; +remove_brackets(Addr) -> Addr. + + +%%------------------------------------------------------------------------- +%% Helper functions for calculating the parsed binary. +%%------------------------------------------------------------------------- +-spec calculate_parsed_scheme(binary(), binary()) -> binary(). +calculate_parsed_scheme(Input, <<>>) -> + strip_last_char(Input, [$:]); +calculate_parsed_scheme(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_part(binary(), binary()) -> binary(). +calculate_parsed_part(Input, <<>>) -> + strip_last_char(Input, [$?,$#]); +calculate_parsed_part(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_userinfo(binary(), binary()) -> binary(). +calculate_parsed_userinfo(Input, <<>>) -> + strip_last_char(Input, [$?,$#,$@]); +calculate_parsed_userinfo(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +-spec calculate_parsed_host_port(binary(), binary()) -> binary(). +calculate_parsed_host_port(Input, <<>>) -> + strip_last_char(Input, [$:,$?,$#,$/]); +calculate_parsed_host_port(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +calculate_parsed_query_fragment(Input, <<>>) -> + strip_last_char(Input, [$#]); +calculate_parsed_query_fragment(Input, Unparsed) -> + get_parsed_binary(Input, Unparsed). + + +get_port(<<>>) -> + undefined; +get_port(B) -> + try binary_to_integer(B) + catch + error:badarg -> + throw({error, invalid_uri, B}) + end. + + +%% Strip last char if it is in list +%% +%% This function is optimized for speed: parse/1 is about 10% faster than +%% with an alternative implementation based on lists and sets. +strip_last_char(<<>>, _) -> <<>>; +strip_last_char(Input, [C0]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + _Else -> + Input + end; +strip_last_char(Input, [C0,C1,C2,C3]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + C3 -> + init_binary(Input); + _Else -> + Input + end. + + +%% Get parsed binary +get_parsed_binary(Input, Unparsed) -> + {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), + First. + + +%% Return all bytes of the binary except the last one. The binary must be non-empty. +init_binary(B) -> + {Init, _} = + split_binary(B, byte_size(B) - 1), + Init. + + +%% Returns the size of a binary exluding the first element. +%% Used in calls to split_binary(). +-spec byte_size_exl_head(binary()) -> number(). +byte_size_exl_head(<<>>) -> 0; +byte_size_exl_head(Binary) -> byte_size(Binary) + 1. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.1. Percent-Encoding] +%% +%% A percent-encoding mechanism is used to represent a data octet in a +%% component when that octet's corresponding character is outside the +%% allowed set or is being used as a delimiter of, or within, the +%% component. A percent-encoded octet is encoded as a character +%% triplet, consisting of the percent character "%" followed by the two +%% hexadecimal digits representing that octet's numeric value. For +%% example, "%20" is the percent-encoding for the binary octet +%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space +%% character (SP). Section 2.4 describes when percent-encoding and +%% decoding is applied. +%% +%% pct-encoded = "%" HEXDIG HEXDIG +%%------------------------------------------------------------------------- +-spec decode_userinfo(binary()) -> binary(). +decode_userinfo(Cs) -> + check_utf8(decode(Cs, fun is_userinfo/1, <<>>)). + +-spec decode_host(binary()) -> binary(). +decode_host(Cs) -> + check_utf8(decode(Cs, fun is_host/1, <<>>)). + +-spec decode_path(binary()) -> binary(). +decode_path(Cs) -> + check_utf8(decode(Cs, fun is_path/1, <<>>)). + +-spec decode_query(binary()) -> binary(). +decode_query(Cs) -> + check_utf8(decode(Cs, fun is_query/1, <<>>)). + +-spec decode_fragment(binary()) -> binary(). +decode_fragment(Cs) -> + check_utf8(decode(Cs, fun is_fragment/1, <<>>)). + + +%% Returns Cs if it is utf8 encoded. +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {incomplete,_,_} -> + throw({error,invalid_utf8,Cs}); + {error,_,_} -> + throw({error,invalid_utf8,Cs}); + _ -> Cs + end. + +%%------------------------------------------------------------------------- +%% Percent-encode +%%------------------------------------------------------------------------- + +%% Only validates as scheme cannot have percent-encoded characters +-spec encode_scheme(list()|binary()) -> list() | binary(). +encode_scheme([]) -> + throw({error,invalid_scheme,""}); +encode_scheme(<<>>) -> + throw({error,invalid_scheme,<<>>}); +encode_scheme(Scheme) -> + case validate_scheme(Scheme) of + true -> Scheme; + false -> throw({error,invalid_scheme,Scheme}) + end. + +-spec encode_userinfo(list()|binary()) -> list() | binary(). +encode_userinfo(Cs) -> + encode(Cs, fun is_userinfo/1). + +-spec encode_host(list()|binary()) -> list() | binary(). +encode_host(Cs) -> + case classify_host(Cs) of + regname -> Cs; + ipv4 -> Cs; + ipv6 -> bracket_ipv6(Cs); + other -> encode(Cs, fun is_reg_name/1) + end. + +-spec encode_path(list()|binary()) -> list() | binary(). +encode_path(Cs) -> + encode(Cs, fun is_path/1). + +-spec encode_query(list()|binary()) -> list() | binary(). +encode_query(Cs) -> + encode(Cs, fun is_query/1). + +-spec encode_fragment(list()|binary()) -> list() | binary(). +encode_fragment(Cs) -> + encode(Cs, fun is_fragment/1). + +%%------------------------------------------------------------------------- +%% Helper funtions for percent-decode +%%------------------------------------------------------------------------- +decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + decode(Cs, Fun, <<Acc/binary, B>>); + false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) + end; +decode(<<C,Cs/binary>>, Fun, Acc) -> + case Fun(C) of + true -> decode(Cs, Fun, <<Acc/binary, C>>); + false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>}) + end; +decode(<<>>, _Fun, Acc) -> + Acc. + +%% Check if char is allowed in host +-spec is_host(char()) -> boolean(). +is_host($:) -> true; +is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is allowed in path +-spec is_path(char()) -> boolean(). +is_path($/) -> true; +is_path(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% Helper functions for percent-encode +%%------------------------------------------------------------------------- +-spec encode(list()|binary(), fun()) -> list() | binary(). +encode(Component, Fun) when is_list(Component) -> + B = unicode:characters_to_binary(Component), + unicode:characters_to_list(encode(B, Fun, <<>>)); +encode(Component, Fun) when is_binary(Component) -> + encode(Component, Fun, <<>>). +%% +encode(<<Char/utf8, Rest/binary>>, Fun, Acc) -> + C = encode_codepoint_binary(Char, Fun), + encode(Rest, Fun, <<Acc/binary,C/binary>>); +encode(<<Char, Rest/binary>>, _Fun, _Acc) -> + throw({error,invalid_input,<<Char,Rest/binary>>}); +encode(<<>>, _Fun, Acc) -> + Acc. + + +-spec encode_codepoint_binary(integer(), fun()) -> binary(). +encode_codepoint_binary(C, Fun) -> + case Fun(C) of + false -> percent_encode_binary(C); + true -> <<C>> + end. + + +-spec percent_encode_binary(integer()) -> binary(). +percent_encode_binary(Code) -> + percent_encode_binary(<<Code/utf8>>, <<>>). + + +percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) -> + percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>); +percent_encode_binary(<<>>, Acc) -> + Acc. + + +%%------------------------------------------------------------------------- +%%------------------------------------------------------------------------- +validate_scheme([]) -> true; +validate_scheme([H|T]) -> + case is_scheme(H) of + true -> validate_scheme(T); + false -> false + end; +validate_scheme(<<>>) -> true; +validate_scheme(<<H, Rest/binary>>) -> + case is_scheme(H) of + true -> validate_scheme(Rest); + false -> false + end. + + +%%------------------------------------------------------------------------- +%% Classifies hostname into the following categories: +%% regname, ipv4 - address does not contain reserved characters to be +%% percent-encoded +%% ipv6 - address does not contain reserved characters but it shall be +%% encolsed in brackets +%% other - address shall be percent-encoded +%%------------------------------------------------------------------------- +classify_host([]) -> other; +classify_host(Addr) when is_binary(Addr) -> + A = unicode:characters_to_list(Addr), + classify_host_ipv6(A); +classify_host(Addr) -> + classify_host_ipv6(Addr). + +classify_host_ipv6(Addr) -> + case is_ipv6_address(Addr) of + true -> ipv6; + false -> classify_host_ipv4(Addr) + end. + +classify_host_ipv4(Addr) -> + case is_ipv4_address(Addr) of + true -> ipv4; + false -> classify_host_regname(Addr) + end. + +classify_host_regname([]) -> regname; +classify_host_regname([H|T]) -> + case is_reg_name(H) of + true -> classify_host_regname(T); + false -> other + end. + +is_ipv4_address(Addr) -> + case inet:parse_ipv4strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +is_ipv6_address(Addr) -> + case inet:parse_ipv6strict_address(Addr) of + {ok, _} -> true; + {error, _} -> false + end. + +bracket_ipv6(Addr) when is_binary(Addr) -> + concat(<<$[,Addr/binary>>,<<$]>>); +bracket_ipv6(Addr) when is_list(Addr) -> + [$[|Addr] ++ "]". + + +%%------------------------------------------------------------------------- +%% Helper funtions for recompose +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% Checks if input Map has valid combination of fields that can be +%% recomposed into a URI. +%% +%% The implementation is based on a decision tree that fulfills the +%% following rules: +%% - 'path' shall always be present in the input map +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% - 'host' shall be present in the input map when 'path' starts with +%% two slashes ("//") +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% path-abempty = *( "/" segment ) +%% segment = *pchar +%% - 'host' shall be present if userinfo or port is present in input map +%% authority = [ userinfo "@" ] host [ ":" port ] +%% - All fields shall be valid (scheme, userinfo, host, port, path, query +%% or fragment). +%%------------------------------------------------------------------------- +is_valid_map(#{path := Path} = Map) -> + ((starts_with_two_slash(Path) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map)) + orelse + (maps:is_key(port, Map) andalso is_valid_map_host(Map)) + orelse + all_fields_valid(Map)); +is_valid_map(#{}) -> + false. + + +is_valid_map_host(Map) -> + maps:is_key(host, Map) andalso all_fields_valid(Map). + + +all_fields_valid(Map) -> + Fun = fun(scheme, _, Acc) -> Acc; + (userinfo, _, Acc) -> Acc; + (host, _, Acc) -> Acc; + (port, _, Acc) -> Acc; + (path, _, Acc) -> Acc; + (query, _, Acc) -> Acc; + (fragment, _, Acc) -> Acc; + (_, _, _) -> false + end, + maps:fold(Fun, true, Map). + + +starts_with_two_slash([$/,$/|_]) -> + true; +starts_with_two_slash(?STRING_REST("//", _)) -> + true; +starts_with_two_slash(_) -> false. + + +update_scheme(#{scheme := Scheme}, _) -> + add_colon_postfix(encode_scheme(Scheme)); +update_scheme(#{}, _) -> + empty. + + +update_userinfo(#{userinfo := Userinfo}, empty) -> + add_auth_prefix(encode_userinfo(Userinfo)); +update_userinfo(#{userinfo := Userinfo}, URI) -> + concat(URI,add_auth_prefix(encode_userinfo(Userinfo))); +update_userinfo(#{}, empty) -> + empty; +update_userinfo(#{}, URI) -> + URI. + + +update_host(#{host := Host}, empty) -> + add_auth_prefix(encode_host(Host)); +update_host(#{host := Host} = Map, URI) -> + concat(URI,add_host_prefix(Map, encode_host(Host))); +update_host(#{}, empty) -> + empty; +update_host(#{}, URI) -> + URI. + + +%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI +update_port(#{port := undefined}, URI) -> + concat(URI, <<":">>); +update_port(#{port := Port}, URI) -> + concat(URI,add_colon(encode_port(Port))); +update_port(#{}, URI) -> + URI. + + +update_path(#{path := Path}, empty) -> + encode_path(Path); +update_path(#{path := Path}, URI) -> + concat(URI,encode_path(Path)); +update_path(#{}, empty) -> + empty; +update_path(#{}, URI) -> + URI. + + +update_query(#{query := Query}, empty) -> + encode_query(Query); +update_query(#{query := Query}, URI) -> + concat(URI,add_question_mark(encode_query(Query))); +update_query(#{}, empty) -> + empty; +update_query(#{}, URI) -> + URI. + + +update_fragment(#{fragment := Fragment}, empty) -> + add_hashmark(encode_fragment(Fragment)); +update_fragment(#{fragment := Fragment}, URI) -> + concat(URI,add_hashmark(encode_fragment(Fragment))); +update_fragment(#{}, empty) -> + ""; +update_fragment(#{}, URI) -> + URI. + +%%------------------------------------------------------------------------- +%% Concatenates its arguments that can be lists and binaries. +%% The result is a list if at least one of its argument is a list and +%% binary otherwise. +%%------------------------------------------------------------------------- +concat(A, B) when is_binary(A), is_binary(B) -> + <<A/binary, B/binary>>; +concat(A, B) when is_binary(A), is_list(B) -> + unicode:characters_to_list(A) ++ B; +concat(A, B) when is_list(A) -> + A ++ maybe_to_list(B). + +add_hashmark(Comp) when is_binary(Comp) -> + <<$#, Comp/binary>>; +add_hashmark(Comp) when is_list(Comp) -> + [$#|Comp]. + +add_question_mark(Comp) when is_binary(Comp) -> + <<$?, Comp/binary>>; +add_question_mark(Comp) when is_list(Comp) -> + [$?|Comp]. + +add_colon(Comp) when is_binary(Comp) -> + <<$:, Comp/binary>>. + +add_colon_postfix(Comp) when is_binary(Comp) -> + <<Comp/binary,$:>>; +add_colon_postfix(Comp) when is_list(Comp) -> + Comp ++ ":". + +add_auth_prefix(Comp) when is_binary(Comp) -> + <<"//", Comp/binary>>; +add_auth_prefix(Comp) when is_list(Comp) -> + [$/,$/|Comp]. + +add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) -> + <<$@,Host/binary>>; +add_host_prefix(#{}, Host) when is_binary(Host) -> + <<"//",Host/binary>>; +add_host_prefix(#{userinfo := _}, Host) when is_list(Host) -> + [$@|Host]; +add_host_prefix(#{}, Host) when is_list(Host) -> + [$/,$/|Host]. + +maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp); +maybe_to_list(Comp) -> Comp. + +encode_port(Port) -> + integer_to_binary(Port). + +%%------------------------------------------------------------------------- +%% Helper functions for transcode +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). +%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) +%% "x%00%00%00%F6" +%% 2. Accumulate characters until percent-encoded segment (transcode/4). +%% Acc = "x" +%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) +%% <<0,0,0,246>> +%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): +%% <<195,182>> +%% 5. Percent-encode out-encoded binary: +%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> +%% 6. Convert binary to list form, reverse it and append the accumulator +%% "6B%3C%" + "x" +%% 7. Reverse Acc and return it +%%------------------------------------------------------------------------- +transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode_pct(L, Acc, <<>>, InEnc, OutEnc); +transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode(L, Acc, [], InEnc, OutEnc). +%% +transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> + transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); +transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> + transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); +transcode([], Acc, List, _InEncoding, _OutEncoding) -> + lists:reverse(List ++ Acc). + + +%% Transcode percent-encoded segment +transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding); + false -> throw({error, invalid_percent_encoding,L}) + end; +transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)), + transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); +transcode_pct([], Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_to_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = convert_to_list(PctEncUtf8, utf8), + lists:reverse(Acc) ++ Out. + + +%% Convert to binary +convert_to_binary(Binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Convert to list +convert_to_list(Binary, InEncoding) -> + case unicode:characters_to_list(Binary, InEncoding) of + {error, _List, RestData} -> + throw({error, invalid_input, RestData}); + {incomplete, _List, RestData} -> + throw({error, invalid_input, RestData}); + Result -> + Result + end. + + +%% Flatten input list +flatten_list([], _) -> + []; +flatten_list(L, InEnc) -> + flatten_list(L, InEnc, []). +%% +flatten_list([H|T], InEnc, Acc) when is_binary(H) -> + L = convert_to_list(H, InEnc), + flatten_list(T, InEnc, lists:reverse(L) ++ Acc); +flatten_list([H|T], InEnc, Acc) when is_list(H) -> + flatten_list(H ++ T, InEnc, Acc); +flatten_list([H|T], InEnc, Acc) -> + flatten_list(T, InEnc, [H|Acc]); +flatten_list([], _InEnc, Acc) -> + lists:reverse(Acc); +flatten_list(Arg, _, _) -> + throw({error, invalid_input, Arg}). + + +percent_encode_segment(Segment) -> + percent_encode_binary(Segment, <<>>). + + +%%------------------------------------------------------------------------- +%% Helper functions for normalize +%%------------------------------------------------------------------------- + +%% 6.2.2.1. Case Normalization +normalize_case(#{scheme := Scheme, host := Host} = Map) -> + Map#{scheme => to_lower(Scheme), + host => to_lower(Host)}; +normalize_case(#{host := Host} = Map) -> + Map#{host => to_lower(Host)}; +normalize_case(#{scheme := Scheme} = Map) -> + Map#{scheme => to_lower(Scheme)}; +normalize_case(#{} = Map) -> + Map. + + +to_lower(Cs) when is_list(Cs) -> + B = convert_to_binary(Cs, utf8, utf8), + convert_to_list(to_lower(B), utf8); +to_lower(Cs) when is_binary(Cs) -> + to_lower(Cs, <<>>). +%% +to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z -> + to_lower(Cs, <<Acc/binary,(C + 32)>>); +to_lower(<<C,Cs/binary>>, Acc) -> + to_lower(Cs, <<Acc/binary,C>>); +to_lower(<<>>, Acc) -> + Acc. + + +%% 6.2.2.3. Path Segment Normalization +%% 5.2.4. Remove Dot Segments +normalize_path_segment(Map) -> + Path = maps:get(path, Map, undefined), + Map#{path => remove_dot_segments(Path)}. + + +remove_dot_segments(Path) when is_binary(Path) -> + remove_dot_segments(Path, <<>>); +remove_dot_segments(Path) when is_list(Path) -> + B = convert_to_binary(Path, utf8, utf8), + B1 = remove_dot_segments(B, <<>>), + convert_to_list(B1, utf8). +%% +remove_dot_segments(<<>>, Output) -> + Output; +remove_dot_segments(<<"../",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"./",T/binary>>, Output) -> + remove_dot_segments(T, Output); +remove_dot_segments(<<"/./",T/binary>>, Output) -> + remove_dot_segments(<<$/,T/binary>>, Output); +remove_dot_segments(<<"/.">>, Output) -> + remove_dot_segments(<<$/>>, Output); +remove_dot_segments(<<"/../",T/binary>>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/,T/binary>>, Out1); +remove_dot_segments(<<"/..">>, Output) -> + Out1 = remove_last_segment(Output), + remove_dot_segments(<<$/>>, Out1); +remove_dot_segments(<<$.>>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(<<"..">>, Output) -> + remove_dot_segments(<<>>, Output); +remove_dot_segments(Input, Output) -> + {First, Rest} = first_path_segment(Input), + remove_dot_segments(Rest, <<Output/binary,First/binary>>). + + +first_path_segment(Input) -> + F = first_path_segment(Input, <<>>), + split_binary(Input, byte_size(F)). +%% +first_path_segment(<<$/,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>); +first_path_segment(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +first_path_segment_end(<<>>, Acc) -> + Acc; +first_path_segment_end(<<$/,_/binary>>, Acc) -> + Acc; +first_path_segment_end(<<C,T/binary>>, Acc) -> + first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>). + + +remove_last_segment(<<>>) -> + <<>>; +remove_last_segment(B) -> + {Init, Last} = split_binary(B, byte_size(B) - 1), + case Last of + <<$/>> -> + Init; + _Char -> + remove_last_segment(Init) + end. + + +%% RFC 3986, 6.2.3. Scheme-Based Normalization +normalize_scheme_based(Map) -> + Scheme = maps:get(scheme, Map, undefined), + Port = maps:get(port, Map, undefined), + Path= maps:get(path, Map, undefined), + normalize_scheme_based(Map, Scheme, Port, Path). +%% +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "http"; Scheme =:= <<"http">> -> + normalize_http(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, Path) + when Scheme =:= "https"; Scheme =:= <<"https">> -> + normalize_https(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> + normalize_ftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) + when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> + normalize_tftp(Map, Port); +normalize_scheme_based(Map, _, _, _) -> + Map. + + +normalize_http(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 80), + normalize_http_path(M1, Path). + + +normalize_https(Map, Port, Path) -> + M1 = normalize_port(Map, Port, 443), + normalize_http_path(M1, Path). + + +normalize_ftp(Map, Port) -> + normalize_port(Map, Port, 21). + + +normalize_ssh_sftp(Map, Port) -> + normalize_port(Map, Port, 22). + + +normalize_tftp(Map, Port) -> + normalize_port(Map, Port, 69). + + +normalize_port(Map, Port, Default) -> + case Port of + Default -> + maps:remove(port, Map); + _Else -> + Map + end. + + +normalize_http_path(Map, Path) -> + case Path of + "" -> + Map#{path => "/"}; + <<>> -> + Map#{path => <<"/">>}; + _Else -> + Map + end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 7b79dcf04d..8490770f3d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -69,6 +69,7 @@ MODULES= \ sets_test_lib \ sofs_SUITE \ stdlib_SUITE \ + stdlib_bench_SUITE \ string_SUITE \ supervisor_1 \ supervisor_2 \ @@ -86,6 +87,7 @@ MODULES= \ timer_simple_SUITE \ unicode_SUITE \ unicode_util_SUITE \ + uri_string_SUITE \ win32reg_SUITE \ y2k_SUITE \ select_SUITE \ @@ -146,7 +148,7 @@ release_spec: opt release_tests_spec: make_emakefile $(INSTALL_DIR) "$(RELSYSDIR)" - $(INSTALL_DATA) stdlib.spec $(EMAKEFILE) \ + $(INSTALL_DATA) stdlib.spec stdlib_bench.spec $(EMAKEFILE) \ $(ERL_FILES) $(COVERFILE) "$(RELSYSDIR)" chmod -R u+w "$(RELSYSDIR)" @tar cf - *_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) diff --git a/lib/stdlib/test/erl_internal_SUITE.erl b/lib/stdlib/test/erl_internal_SUITE.erl index 789a9d4363..7d9df1f989 100644 --- a/lib/stdlib/test/erl_internal_SUITE.erl +++ b/lib/stdlib/test/erl_internal_SUITE.erl @@ -80,7 +80,7 @@ callbacks(application) -> callbacks(gen_server) -> [{init,1}, {handle_call,3}, {handle_cast,2}, {handle_info,2}, {terminate,2}, {code_change,3}, - {format_status,2}]; + {format_status,2}, {handle_continue, 2}]; callbacks(gen_fsm) -> [{init,1}, {handle_event,3}, {handle_sync_event,4}, {handle_info,3}, {terminate,3}, {code_change,4}, @@ -101,7 +101,7 @@ callbacks(supervisor) -> optional_callbacks(application) -> []; optional_callbacks(gen_server) -> - [{handle_info, 2}, {terminate, 2}, {code_change, 3}, {format_status, 2}]; + [{handle_info, 2}, {handle_continue, 2}, {terminate, 2}, {code_change, 3}, {format_status, 2}]; optional_callbacks(gen_fsm) -> [{handle_info, 3}, {terminate, 3}, {code_change, 4}, {format_status, 2}]; optional_callbacks(gen_event) -> diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl index 05451a83fb..f329a07b7a 100644 --- a/lib/stdlib/test/ets_SUITE.erl +++ b/lib/stdlib/test/ets_SUITE.erl @@ -1682,7 +1682,7 @@ do_random_test() -> ets:delete(Set), verify_etsmem(EtsMem). -%% Ttest various variants of update_element. +%% Test various variants of update_element. update_element(Config) when is_list(Config) -> EtsMem = etsmem(), repeat_for_opts(fun update_element_opts/1), @@ -2283,13 +2283,8 @@ write_concurrency(Config) when is_list(Config) -> NoHashMem = ets:info(No7,memory), NoHashMem = ets:info(No8,memory), - case erlang:system_info(smp_support) of - true -> - true = YesMem > NoHashMem, - true = YesMem > NoTreeMem; - false -> - true = YesMem =:= NoHashMem - end, + true = YesMem > NoHashMem, + true = YesMem > NoTreeMem, {'EXIT',{badarg,_}} = (catch ets_new(foo,[public,{write_concurrency,foo}])), {'EXIT',{badarg,_}} = (catch ets_new(foo,[public,{write_concurrency}])), @@ -5912,16 +5907,11 @@ add_lists([E1|T1], [E2|T2], Acc) -> run_smp_workers(InitF,ExecF,FiniF,Laps) -> run_smp_workers(InitF,ExecF,FiniF,Laps, 0). run_smp_workers(InitF,ExecF,FiniF,Laps, Exclude) -> - case erlang:system_info(smp_support) of - true -> - case erlang:system_info(schedulers_online) of - N when N > Exclude -> - run_workers_do(InitF,ExecF,FiniF,Laps, N - Exclude); - _ -> - {skipped, "Too few schedulers online"} - end; - false -> - {skipped,"No smp support"} + case erlang:system_info(schedulers_online) of + N when N > Exclude -> + run_workers_do(InitF,ExecF,FiniF,Laps, N - Exclude); + _ -> + {skipped, "Too few schedulers online"} end. run_sched_workers(InitF,ExecF,FiniF,Laps) -> @@ -6231,11 +6221,9 @@ spawn_monitor_with_pid(Pid, Fun, N) -> only_if_smp(Func) -> only_if_smp(2, Func). only_if_smp(Schedulers, Func) -> - case {erlang:system_info(smp_support), - erlang:system_info(schedulers_online)} of - {false,_} -> {skip,"No smp support"}; - {true,N} when N < Schedulers -> {skip,"Too few schedulers online"}; - {true,_} -> Func() + case erlang:system_info(schedulers_online) of + N when N < Schedulers -> {skip,"Too few schedulers online"}; + _ -> Func() end. %% Copy-paste from emulator/test/binary_SUITE.erl diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index c94821bc75..1236fe45f4 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -120,7 +120,7 @@ wcc(Wc, Error) -> do_wildcard_1(Dir, Wcf0) -> do_wildcard_2(Dir, Wcf0), Wcf = fun(Wc0) -> - Wc = filename:join(Dir, Wc0), + Wc = Dir ++ "/" ++ Wc0, L = Wcf0(Wc), [subtract_dir(N, Dir) || N <- L] end, @@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) -> %% Cleanup. del(Files), [ok = file:del_dir(D) || D <- lists:reverse(Dirs)], - ok. + do_wildcard_10(Dir, Wcf). + +%% ERL-451/OTP-14577: Escape characters using \\. +do_wildcard_10(Dir, Wcf) -> + All0 = ["{abc}","abc","def","---","z--","@a,b","@c"], + All = case os:type() of + {unix,_} -> + %% '?' is allowed in file names on Unix, but + %% not on Windows. + ["?q"|All0]; + _ -> + All0 + end, + Files = mkfiles(lists:reverse(All), Dir), + + ["{abc}"] = Wcf("\\{a*"), + ["{abc}"] = Wcf("\\{abc}"), + ["abc","def","z--"] = Wcf("[a-z]*"), + ["---","abc","z--"] = Wcf("[a\\-z]*"), + ["@a,b","@c"] = Wcf("@{a\\,b,c}"), + ["@c"] = Wcf("@{a,b,c}"), + + case os:type() of + {unix,_} -> + ["?q"] = Wcf("\\?q"); + _ -> + [] = Wcf("\\?q") + end, + del(Files), + ok. fold_files(Config) when is_list(Config) -> Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"), diff --git a/lib/stdlib/test/gen_server_SUITE.erl b/lib/stdlib/test/gen_server_SUITE.erl index 2e9dc4d4fb..2bc220fef2 100644 --- a/lib/stdlib/test/gen_server_SUITE.erl +++ b/lib/stdlib/test/gen_server_SUITE.erl @@ -27,7 +27,7 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]). -export([start/1, crash/1, call/1, cast/1, cast_fast/1, - info/1, abcast/1, multicall/1, multicall_down/1, + continue/1, info/1, abcast/1, multicall/1, multicall_down/1, call_remote1/1, call_remote2/1, call_remote3/1, call_remote_n1/1, call_remote_n2/1, call_remote_n3/1, spec_init/1, spec_init_local_registered_parent/1, @@ -37,7 +37,8 @@ get_state/1, replace_state/1, call_with_huge_message_queue/1, undef_handle_call/1, undef_handle_cast/1, undef_handle_info/1, undef_init/1, undef_code_change/1, undef_terminate1/1, - undef_terminate2/1, undef_in_terminate/1, undef_in_handle_info/1 + undef_terminate2/1, undef_in_terminate/1, undef_in_handle_info/1, + undef_handle_continue/1 ]). -export([stop1/1, stop2/1, stop3/1, stop4/1, stop5/1, stop6/1, stop7/1, @@ -52,7 +53,7 @@ %% The gen_server behaviour --export([init/1, handle_call/3, handle_cast/2, +-export([init/1, handle_call/3, handle_cast/2, handle_continue/2, handle_info/2, code_change/3, terminate/2, format_status/2]). suite() -> @@ -61,7 +62,7 @@ suite() -> all() -> [start, {group,stop}, crash, call, cast, cast_fast, info, abcast, - multicall, multicall_down, call_remote1, call_remote2, + continue, multicall, multicall_down, call_remote1, call_remote2, call_remote3, call_remote_n1, call_remote_n2, call_remote_n3, spec_init, spec_init_local_registered_parent, @@ -76,7 +77,7 @@ groups() -> [{stop, [], [stop1, stop2, stop3, stop4, stop5, stop6, stop7, stop8, stop9, stop10]}, {undef_callbacks, [], - [undef_handle_call, undef_handle_cast, undef_handle_info, + [undef_handle_call, undef_handle_cast, undef_handle_info, undef_handle_continue, undef_init, undef_code_change, undef_terminate1, undef_terminate2]}]. @@ -458,6 +459,47 @@ call(Config) when is_list(Config) -> ok. %% -------------------------------------- +%% Test handle_continue. +%% -------------------------------------- + +continue(Config) when is_list(Config) -> + {ok, Pid} = gen_server:start_link(gen_server_SUITE, {continue, self()}, []), + [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + gen_server:call(Pid, {continue_reply, self()}), + [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + gen_server:call(Pid, {continue_noreply, self()}), + [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + gen_server:cast(Pid, {continue_noreply, self()}), + [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + Pid ! {continue_noreply, self()}, + [{Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + Pid ! {continue_continue, self()}, + [{Pid, before_continue}, {Pid, continue}, {Pid, after_continue}] = read_replies(Pid), + + Ref = monitor(process, Pid), + Pid ! continue_stop, + verify_down_reason(Ref, Pid, normal). + +read_replies(Pid) -> + receive + {Pid, ack} -> read_replies() + after + 1000 -> ct:fail({continue, ack}) + end. + +read_replies() -> + receive + Msg -> [Msg | read_replies()] + after + 0 -> [] + end. + +%% -------------------------------------- %% Test call to nonexisting processes on remote nodes %% -------------------------------------- @@ -1346,7 +1388,7 @@ echo_loop() -> %% Test the default implementation of terminate if the callback module %% does not export it undef_terminate1(Config) when is_list(Config) -> - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), MRef = monitor(process, Server), ok = gen_server:stop(Server), ok = verify_down_reason(MRef, Server, normal). @@ -1354,7 +1396,7 @@ undef_terminate1(Config) when is_list(Config) -> %% Test the default implementation of terminate if the callback module %% does not export it undef_terminate2(Config) when is_list(Config) -> - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), MRef = monitor(process, Server), ok = gen_server:stop(Server, {error, test}, infinity), ok = verify_down_reason(MRef, Server, {error, test}). @@ -1377,7 +1419,7 @@ undef_init(_Config) -> %% The upgrade should fail if code_change is expected in the callback module %% but not exported, but the server should continue with the old code undef_code_change(Config) when is_list(Config) -> - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), {error, {'EXIT', {undef, [{oc_server, code_change, [_, _, _], _}|_]}}} = fake_upgrade(Server, ?MODULE), true = is_process_alive(Server). @@ -1385,7 +1427,7 @@ undef_code_change(Config) when is_list(Config) -> %% The server should crash if the handle_call callback is %% not exported in the callback module undef_handle_call(_Config) -> - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), try gen_server:call(Server, call_msg), ct:fail(should_crash) @@ -1397,17 +1439,25 @@ undef_handle_call(_Config) -> %% The server should crash if the handle_cast callback is %% not exported in the callback module undef_handle_cast(_Config) -> - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), MRef = monitor(process, Server), gen_server:cast(Server, cast_msg), verify_undef_down(MRef, Server, oc_server, handle_cast), ok. +%% The server should crash if the handle_continue callback is +%% not exported in the callback module +undef_handle_continue(_Config) -> + {ok, Server} = oc_server:start(continue), + MRef = monitor(process, Server), + verify_undef_down(MRef, Server, oc_server, handle_continue), + ok. + %% The server should log but not crash if the handle_info callback is %% calling an undefined function undef_handle_info(Config) when is_list(Config) -> error_logger_forwarder:register(), - {ok, Server} = gen_server:start(oc_server, [], []), + {ok, Server} = oc_server:start(), Server ! hej, wait_until_processed(Server, hej, 10), true = is_process_alive(Server), @@ -1570,8 +1620,11 @@ init(hibernate) -> init(sleep) -> ct:sleep(1000), {ok, []}; +init({continue, Pid}) -> + self() ! {after_continue, Pid}, + {ok, [], {continue, {message, Pid}}}; init({state,State}) -> - {ok, State}. + {ok,State}. handle_call(started_p, _From, State) -> io:format("FROZ"), @@ -1604,6 +1657,12 @@ handle_call(shutdown_reason, _From, _State) -> handle_call({call_undef_fun, Mod, Fun}, _From, State) -> Mod:Fun(), {reply, ok, State}; +handle_call({continue_reply, Pid}, _From, State) -> + self() ! {after_continue, Pid}, + {reply, ok, State, {continue, {message, Pid}}}; +handle_call({continue_noreply, Pid}, From, State) -> + self() ! {after_continue, Pid}, + {noreply, State, {continue, {message, Pid, From}}}; handle_call(stop_shutdown_reason, _From, State) -> {stop,{shutdown,stop_reason},State}. @@ -1620,6 +1679,9 @@ handle_cast(hibernate_later, _State) -> handle_cast({call_undef_fun, Mod, Fun}, State) -> Mod:Fun(), {noreply, State}; +handle_cast({continue_noreply, Pid}, State) -> + self() ! {after_continue, Pid}, + {noreply, State, {continue, {message, Pid}}}; handle_cast({From, stop}, State) -> io:format("BAZ"), {stop, {From,stopped}, State}. @@ -1657,9 +1719,34 @@ handle_info(continue, From) -> {noreply, []}; handle_info({From, stop}, State) -> {stop, {From,stopped_info}, State}; +handle_info({after_continue, Pid}, State) -> + Pid ! {self(), after_continue}, + Pid ! {self(), ack}, + {noreply, State}; +handle_info({continue_noreply, Pid}, State) -> + self() ! {after_continue, Pid}, + {noreply, State, {continue, {message, Pid}}}; +handle_info({continue_continue, Pid}, State) -> + {noreply, State, {continue, {continue, Pid}}}; +handle_info(continue_stop, State) -> + {noreply, State, {continue, stop}}; handle_info(_Info, State) -> {noreply, State}. +handle_continue({continue, Pid}, State) -> + Pid ! {self(), before_continue}, + self() ! {after_continue, Pid}, + {noreply, State, {continue, {message, Pid}}}; +handle_continue(stop, State) -> + {stop, normal, State}; +handle_continue({message, Pid}, State) -> + Pid ! {self(), continue}, + {noreply, State}; +handle_continue({message, Pid, From}, State) -> + Pid ! {self(), continue}, + gen_server:reply(From, ok), + {noreply, State}. + code_change(_OldVsn, {new, {undef_in_code_change, {Mod, Fun}}} = State, _Extra) -> diff --git a/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl b/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl index 4ba37987f3..7b92a49bf6 100644 --- a/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl +++ b/lib/stdlib/test/gen_server_SUITE_data/oc_server.erl @@ -22,7 +22,7 @@ -behaviour(gen_server). %% API --export([start/0]). +-export([start/0, start/1]). %% gen_server callbacks -export([init/1]). @@ -30,8 +30,12 @@ -record(state, {}). start() -> - gen_server:start({local, ?MODULE}, ?MODULE, [], []). + gen_server:start(?MODULE, ok, []). -init([]) -> - {ok, #state{}}. +start(continue) -> + gen_server:start(?MODULE, continue, []). +init(ok) -> + {ok, #state{}}; +init(continue) -> + {ok, #state{}, {continue, continue}}. diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README new file mode 100644 index 0000000000..57602bf719 --- /dev/null +++ b/lib/stdlib/test/property_test/README @@ -0,0 +1,12 @@ + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% %%% +%%% WARNING %%% +%%% %%% +%%% This is experimental code which may be changed or removed %%% +%%% anytime without any warning. %%% +%%% %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr. + diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl new file mode 100644 index 0000000000..e51a671172 --- /dev/null +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -0,0 +1,361 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_recompose). + +-compile(export_all). + +-proptest(eqc). +-proptest([triq,proper]). + +-ifndef(EQC). +-ifndef(PROPER). +-ifndef(TRIQ). +-define(EQC,true). +-endif. +-endif. +-endif. + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +-define(MOD_eqc,eqc). + +-else. +-ifdef(PROPER). +-include_lib("proper/include/proper.hrl"). +-define(MOD_eqc,proper). + +-else. +-ifdef(TRIQ). +-define(MOD_eqc,triq). +-include_lib("triq/include/triq.hrl"). + +-endif. +-endif. +-endif. + + +-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>). + +-define(SCHEME, {scheme, scheme()}). +-define(USER, {userinfo, unicode()}). +-define(HOST, {host, host_map()}). +-define(PORT, {port, port()}). +-define(PATH_ABE, {path, path_abempty_map()}). +-define(PATH_ABS, {path, path_absolute_map()}). +-define(PATH_NOS, {path, path_noscheme_map()}). +-define(PATH_ROO, {path, path_rootless_map()}). +-define(PATH_EMP, {path, path_empty_map()}). +-define(QUERY, {query, query_map()}). +-define(FRAGMENT, {fragment, fragment_map()}). + + +%%%======================================================================== +%%% Properties +%%%======================================================================== + +prop_recompose() -> + ?FORALL(Map, map(), + Map =:= uri_string:parse(uri_string:recompose(Map)) + ). + +%% Stats +prop_map_key_length_collect() -> + ?FORALL(List, map(), + collect(length(maps:keys(List)), true)). + +prop_map_collect() -> + ?FORALL(List, map(), + collect(lists:sort(maps:keys(List)), true)). + +prop_scheme_collect() -> + ?FORALL(List, scheme(), + collect(length(List), true)). + + +%%%======================================================================== +%%% Generators +%%%======================================================================== + +map() -> + ?LET(Gen, comp_proplist(), proplist_to_map(Gen)). + +comp_proplist() -> + frequency([ + {2, [?SCHEME,?PATH_ABS]}, + {2, [?SCHEME,?PATH_ROO]}, + {2, [?SCHEME,?PATH_EMP]}, + {2, [?SCHEME,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE]}, + + {2, [?PATH_ABS]}, + {2, [?PATH_NOS]}, + {2, [?PATH_EMP]}, + {2, [?HOST,?PATH_ABE]}, + {2, [?USER,?HOST,?PATH_ABE]}, + {2, [?HOST,?PORT,?PATH_ABE]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY]}, + {2, [?SCHEME,?PATH_ROO,?QUERY]}, + {2, [?SCHEME,?PATH_EMP,?QUERY]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + {2, [?PATH_ABS,?QUERY]}, + {2, [?PATH_NOS,?QUERY]}, + {2, [?PATH_EMP,?QUERY]}, + {2, [?HOST,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY]}, + + + {2, [?SCHEME,?PATH_ABS,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + {2, [?PATH_ABS,?FRAGMENT]}, + {2, [?PATH_NOS,?FRAGMENT]}, + {2, [?PATH_EMP,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?FRAGMENT]}, + + + {2, [?SCHEME,?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_ROO,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?SCHEME,?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + + {2, [?PATH_ABS,?QUERY,?FRAGMENT]}, + {2, [?PATH_NOS,?QUERY,?FRAGMENT]}, + {2, [?PATH_EMP,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]}, + {2, [?USER,?HOST,?PORT,?PATH_ABE,?QUERY,?FRAGMENT]} + ]). + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +path_abempty_map() -> + frequency([{90, path_abe_map()}, + {10, path_empty_map()}]). + +path_abe_map() -> + ?SIZED(Length, path_abe_map(Length, [])). +%% +path_abe_map(0, Segments) -> + ?LET(Gen, Segments, lists:append(Gen)); +path_abe_map(N, Segments) -> + path_abe_map(N-1, [slash(),segment()|Segments]). + + +path_absolute_map() -> + ?SIZED(Length, path_absolute_map(Length, [])). +%% +path_absolute_map(0, Segments) -> + ?LET(Gen, [slash(),segment_nz()|Segments], lists:append(Gen)); +path_absolute_map(N, Segments) -> + path_absolute_map(N-1, [slash(),segment()|Segments]). + + +path_noscheme_map() -> + ?SIZED(Length, path_noscheme_map(Length, [])). +%% +path_noscheme_map(0, Segments) -> + ?LET(Gen, [segment_nz_nc()|Segments], lists:append(Gen)); +path_noscheme_map(N, Segments) -> + path_noscheme_map(N-1, [slash(),segment()|Segments]). + +path_rootless_map() -> + ?SIZED(Length, path_rootless_map(Length, [])). +%% +path_rootless_map(0, Segments) -> + ?LET(Gen, [segment_nz()|Segments], lists:append(Gen)); +path_rootless_map(N, Segments) -> + path_rootless_map(N-1, [slash(),segment()|Segments]). + + +segment_nz() -> + non_empty(segment()). + +segment_nz_nc() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$@])} + ]))). + + +segment() -> + list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, unicode_char()}, + {5, oneof([$:, $@])} + ])). + +slash() -> + "/". + +path_empty_map() -> + "". + + +%%------------------------------------------------------------------------- +%% Path +%%------------------------------------------------------------------------- +host_map() -> + frequency([{30, reg_name()}, + {30, ip_address()} + ]). + + +reg_name() -> + list(frequency([{30, alpha()}, + {10, sub_delims()}, + {10, unicode_char()} + ])). + +ip_address() -> + oneof(["127.0.0.1", "::127.0.0.1", + "2001:0db8:0000:0000:0000:0000:1428:07ab", + "2001:0db8:0000:0000:0000::1428:07ab", + "2001:0db8:0:0:0:0:1428:07ab", + "2001:0db8:0::0:1428:07ab"]). + +%% Generating only reg-names +host_uri() -> + non_empty(list(frequency([{30, unreserved()}, + {10, sub_delims()}, + {10, pct_encoded()} + ]))). + +%%------------------------------------------------------------------------- +%% Port, Query, Fragment +%%------------------------------------------------------------------------- +port() -> + frequency([{10, undefined}, + {10, range(1,65535)} + ]). + +query_map() -> + unicode(). + + +query_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + +fragment_map() -> + unicode(). + +fragment_uri() -> + [$?| non_empty(list(frequency([{20, pchar()}, + {5, oneof([$/, $?])} % punctuation + ])))]. + + +%%------------------------------------------------------------------------- +%% Scheme +%%------------------------------------------------------------------------- +scheme() -> + ?SIZED(Length, scheme_start(Length, [])). +%% +scheme_start(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme_start(N, L) -> + scheme(N-1,[alpha()|L]). + +scheme(0, L) -> + ?LET(Gen, L, lists:reverse(Gen)); +scheme(N, L) -> + scheme(N-1, [scheme_char()|L]). + + +%%------------------------------------------------------------------------- +%% Misc +%%------------------------------------------------------------------------- +unicode() -> + list(frequency([{20, alpha()}, % alpha + {10, digit()}, % digit + {10, unicode_char()} % unicode + ])). + +scheme_char() -> + frequency([{20, alpha()}, % alpha + {20, digit()}, % digit + {5, oneof([$+, $-, $.])} % punctuation + ]). + +sub_delims() -> + oneof([$!, $$, $&, $', $(, $), + $*, $+, $,,$;, $=]). + +pchar() -> + frequency([{20, unreserved()}, + {5, pct_encoded()}, + {5, sub_delims()}, + {1, oneof([$:, $@])} % punctuation + ]). + +unreserved() -> + frequency([{20, alpha()}, + {5, digit()}, + {1, oneof([$-, $., $_, $~])} % punctuation + ]). + +unicode_char() -> + range(913, 1023). + +alpha() -> + frequency([{20, range($a, $z)}, % letters + {20, range($A, $Z)}]). % letters + +digit() -> + range($0, $9). % numbers + +pct_encoded() -> + oneof(["%C3%A4", "%C3%A5", "%C3%B6"]). + + +%%%======================================================================== +%%% Helpers +%%%======================================================================== +proplist_to_map(L) -> + lists:foldl(fun({K,V},M) -> M#{K => V}; + (_,M) -> M + end, #{}, L). diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index 432293b656..ef4f9faad9 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -29,11 +29,14 @@ basic_stats_uniform_1/1, basic_stats_uniform_2/1, basic_stats_standard_normal/1, basic_stats_normal/1, + uniform_real_conv/1, plugin/1, measure/1, reference_jump_state/1, reference_jump_procdict/1]). -export([test/0, gen/1]). +-export([uniform_real_gen/1, uniform_gen/2]). + -include_lib("common_test/include/ct.hrl"). -define(LOOP, 1000000). @@ -46,7 +49,7 @@ all() -> [seed, interval_int, interval_float, api_eq, reference, - {group, basic_stats}, + {group, basic_stats}, uniform_real_conv, plugin, measure, {group, reference_jump} ]. @@ -80,7 +83,7 @@ test() -> end, Tests). algs() -> - [exs64, exsplus, exsp, exrop, exs1024, exs1024s]. + [exrop, exsp, exs1024s, exs64, exsplus, exs1024]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -101,7 +104,7 @@ seed_1(Alg) -> _ = rand:uniform(), S00 = get(rand_seed), erase(), - _ = rand:uniform(), + _ = rand:uniform_real(), false = S00 =:= get(rand_seed), %% hopefully %% Choosing algo and seed @@ -228,11 +231,13 @@ interval_float(Config) when is_list(Config) -> interval_float_1(0) -> ok; interval_float_1(N) -> X = rand:uniform(), + Y = rand:uniform_real(), if - 0.0 =< X, X < 1.0 -> + 0.0 =< X, X < 1.0, 0.0 < Y, Y < 1.0 -> ok; true -> - io:format("X=~p 0=<~p<1.0~n", [X,X]), + io:format("X=~p 0.0=<~p<1.0~n", [X,X]), + io:format("Y=~p 0.0<~p<1.0~n", [Y,Y]), exit({X, rand:export_seed()}) end, interval_float_1(N-1). @@ -334,7 +339,13 @@ basic_stats_normal(Config) when is_list(Config) -> IntendedMeanVariancePairs). basic_uniform_1(N, S0, Sum, A0) when N > 0 -> - {X,S} = rand:uniform_s(S0), + {X,S} = + case N band 1 of + 0 -> + rand:uniform_s(S0); + 1 -> + rand:uniform_real_s(S0) + end, I = trunc(X*100), A = array:set(I, 1+array:get(I,A0), A0), basic_uniform_1(N-1, S, Sum+X, A); @@ -400,6 +411,137 @@ normal_s(Mean, Variance, State0) -> rand:normal_s(Mean, Variance, State0). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% White box test of the conversion to float + +uniform_real_conv(Config) when is_list(Config) -> + [begin +%% ct:pal("~13.16.0bx~3.16.0b: ~p~n", [M,E,Gen]), + uniform_real_conv_check(M, E, Gen) + end || {M, E, Gen} <- uniform_real_conv_data()], + uniform_real_scan(0), + uniform_real_scan(3). + +uniform_real_conv_data() -> + [{16#fffffffffffff, -1, [16#3ffffffffffffff]}, + {16#fffffffffffff, -1, [16#3ffffffffffffe0]}, + {16#ffffffffffffe, -1, [16#3ffffffffffffdf]}, + %% + {16#0000000000000, -1, [16#200000000000000]}, + {16#fffffffffffff, -2, [16#1ffffffffffffff]}, + {16#fffffffffffff, -2, [16#1fffffffffffff0]}, + {16#ffffffffffffe, -2, [16#1ffffffffffffef]}, + %% + {16#0000000000000, -2, [16#100000000000000]}, + {16#fffffffffffff, -3, [16#0ffffffffffffff]}, + {16#fffffffffffff, -3, [16#0fffffffffffff8]}, + {16#ffffffffffffe, -3, [16#0fffffffffffff7]}, + %% + {16#0000000000000, -3, [16#080000000000000]}, + {16#fffffffffffff, -4, [16#07fffffffffffff]}, + {16#fffffffffffff, -4, [16#07ffffffffffffc]}, + {16#ffffffffffffe, -4, [16#07ffffffffffffb]}, + %% + {16#0000000000000, -4, [16#040000000000000]}, + {16#fffffffffffff, -5, [16#03fffffffffffff,16#3ffffffffffffff]}, + {16#fffffffffffff, -5, [16#03ffffffffffffe,16#200000000000000]}, + {16#ffffffffffffe, -5, [16#03fffffffffffff,16#1ffffffffffffff]}, + {16#ffffffffffffe, -5, [16#03fffffffffffff,16#100000000000000]}, + %% + {16#0000000000001, -56, [16#000000000000007,16#00000000000007f]}, + {16#0000000000001, -56, [16#000000000000004,16#000000000000040]}, + {16#0000000000000, -57, [16#000000000000003,16#20000000000001f]}, + {16#0000000000000, -57, [16#000000000000000,16#200000000000000]}, + {16#fffffffffffff, -58, [16#000000000000003,16#1ffffffffffffff]}, + {16#fffffffffffff, -58, [16#000000000000000,16#1fffffffffffff0]}, + {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffef]}, + {16#ffffffffffffe, -58, [16#000000000000000,16#1ffffffffffffe0]}, + %% + {16#0000000000000, -58, [16#000000000000000,16#10000000000000f]}, + {16#0000000000000, -58, [16#000000000000000,16#100000000000000]}, + {2#11001100000000000000000000000000000000000011000000011, % 53 bits + -1022, + [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros + 2#1100110000000000000000000000000000000000001 bsl 2, % 43 bits + 2#1000000011 bsl (56-10+2)]}, % 10 bits + {0, -1, % 0.5 after retry + [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, % 18 zeros + 2#111111111111111111111111111111111111111111 bsl 2, % 42 bits - retry + 16#200000000000003]}]. % 0.5 + +-define(UNIFORM_REAL_SCAN_PATTERN, (16#19000000000009)). % 53 bits +-define(UNIFORM_REAL_SCAN_NUMBER, (1021)). + +uniform_real_scan_template(K) -> + <<0:?UNIFORM_REAL_SCAN_NUMBER, + ?UNIFORM_REAL_SCAN_PATTERN:53,K:2,0:1>>. + +uniform_real_scan(K) -> + Templ = uniform_real_scan_template(K), + N = ?UNIFORM_REAL_SCAN_NUMBER, + uniform_real_scan(Templ, N, K). + +uniform_real_scan(Templ, N, K) when 0 =< N -> + <<_:N/bits,T/bits>> = Templ, + Data = uniform_real_scan_data(T, K), + uniform_real_conv_check( + ?UNIFORM_REAL_SCAN_PATTERN, N - 1 - ?UNIFORM_REAL_SCAN_NUMBER, Data), + uniform_real_scan(Templ, N - 1, K); +uniform_real_scan(_, _, _) -> + ok. + +uniform_real_scan_data(Templ, K) -> + case Templ of + <<X:56, T/bits>> -> + B = rand:bc64(X), + [(X bsl 2) bor K | + if + 53 =< B -> + []; + true -> + uniform_real_scan_data(T, K) + end]; + _ -> + <<X:56, _/bits>> = <<Templ/bits, 0:56>>, + [(X bsl 2) bor K] + end. + +uniform_real_conv_check(M, E, Gen) -> + <<F/float>> = <<0:1, (E + 16#3ff):11, M:52>>, + try uniform_real_gen(Gen) of + F -> F; + FF -> + ct:pal( + "~s =/= ~s: ~s~n", + [rand:float2str(FF), rand:float2str(F), + [["16#",integer_to_list(G,16),$\s]||G<-Gen]]), + ct:fail({neq, FF, F}) + catch + Error:Reason -> + ct:pal( + "~w:~p ~s: ~s~n", + [Error, Reason, rand:float2str(F), + [["16#",integer_to_list(G,16),$\s]||G<-Gen]]), + ct:fail({Error, Reason, F, erlang:get_stacktrace()}) + end. + + +uniform_real_gen(Gen) -> + State = rand_state(Gen), + {F, {#{type := rand_SUITE_list},[]}} = rand:uniform_real_s(State), + F. + +uniform_gen(Range, Gen) -> + State = rand_state(Gen), + {N, {#{type := rand_SUITE_list},[]}} = rand:uniform_s(Range, State), + N. + +%% Loaded dice for white box tests +rand_state(Gen) -> + {#{type => rand_SUITE_list, bits => 58, weak_low_bits => 1, + next => fun ([H|T]) -> {H, T} end}, + Gen}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Test that the user can write algorithms. plugin(Config) when is_list(Config) -> @@ -459,213 +601,289 @@ measure(Config) -> {skip,{will_not_run_in_scaled_time,Scale}} end. +-define(CHECK_UNIFORM_RANGE(Gen, Range, X, St), + case (Gen) of + {(X), (St)} when is_integer(X), 1 =< (X), (X) =< (Range) -> + St + end). +-define(CHECK_UNIFORM(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X), 0.0 =< (X), (X) < 1.0 -> + St + end). +-define(CHECK_UNIFORM_NZ(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X), 0.0 < (X), (X) =< 1.0 -> + St + end). +-define(CHECK_NORMAL(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X) -> + St + end). + do_measure(_Config) -> - Algos = + Algs = + algs() ++ try crypto:strong_rand_bytes(1) of - <<_>> -> [crypto64, crypto] + <<_>> -> [crypto64, crypto_cache, crypto] catch error:low_entropy -> []; error:undef -> [] - end ++ algs(), + end, %% - ct:pal("RNG uniform integer performance~n",[]), - TMark1 = + ct:pal("~nRNG uniform integer range 10000 performance~n",[]), + _ = measure_1( - random, fun (_) -> 10000 end, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform integer 32 bit performance~n",[]), _ = - [measure_1( - Algo, - fun (_) -> 10000 end, - TMark1, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (_) -> 1 bsl 32 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer half range performance~n",[]), - HalfRangeFun = fun (State) -> half_range(State) end, - TMark2 = - measure_1( - random, - HalfRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - HalfRangeFun, - TMark2, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], - %% - ct:pal("~nRNG uniform integer half range + 1 performance~n",[]), - HalfRangePlus1Fun = fun (State) -> half_range(State) + 1 end, - TMark3 = measure_1( - random, - HalfRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + fun (State) -> half_range(State) end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform integer half range + 1 performance~n",[]), _ = - [measure_1( - Algo, - HalfRangePlus1Fun, - TMark3, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> half_range(State) + 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range - 1 performance~n",[]), - FullRangeMinus1Fun = fun (State) -> (half_range(State) bsl 1) - 1 end, - TMark4 = - measure_1( - random, - FullRangeMinus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangeMinus1Fun, - TMark4, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> (half_range(State) bsl 1) - 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range performance~n",[]), - FullRangeFun = fun (State) -> half_range(State) bsl 1 end, - TMark5 = - measure_1( - random, - FullRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangeFun, - TMark5, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> half_range(State) bsl 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range + 1 performance~n",[]), - FullRangePlus1Fun = fun (State) -> (half_range(State) bsl 1) + 1 end, - TMark6 = - measure_1( - random, - FullRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangePlus1Fun, - TMark6, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> (half_range(State) bsl 1) + 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer double range performance~n",[]), - DoubleRangeFun = fun (State) -> half_range(State) bsl 2 end, - TMark7 = - measure_1( - random, - DoubleRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - DoubleRangeFun, - TMark7, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> + half_range(State) bsl 2 + end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer double range + 1 performance~n",[]), - DoubleRangePlus1Fun = fun (State) -> (half_range(State) bsl 2) + 1 end, - TMark8 = + _ = measure_1( - random, - DoubleRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + fun (State) -> + (half_range(State) bsl 2) + 1 + end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform integer 64 bit performance~n",[]), _ = - [measure_1( - Algo, - DoubleRangePlus1Fun, - TMark8, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (_) -> 1 bsl 64 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform float performance~n",[]), - TMark9 = + _ = measure_1( - random, fun (_) -> 0 end, - undefined, - fun (_, State) -> - {uniform, random:uniform_s(State)} - end), + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM(Mod:uniform_s(St0), X, St) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform_real float performance~n",[]), _ = - [measure_1( - Algo, - fun (_) -> 0 end, - TMark9, - fun (_, State) -> - {uniform, rand:uniform_s(State)} - end) || Algo <- Algos], + measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM(Mod:uniform_real_s(St0), X, St) + end, + State) + end, + Algs), %% ct:pal("~nRNG normal float performance~n",[]), - io:format("~.12w: not implemented (too few bits)~n", [random]), - _ = [measure_1( - Algo, - fun (_) -> 0 end, - TMark9, - fun (_, State) -> - {normal, rand:normal_s(State)} - end) || Algo <- Algos], + [TMarkNormalFloat|_] = + measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_NORMAL(Mod:normal_s(St0), X, St1) + end, + State) + end, + Algs), + %% Just for fun try an implementation of the Box-Muller + %% transformation for creating normal distribution floats + %% to compare with our Ziggurat implementation. + %% Generates two numbers per call that we add so they + %% will not be optimized away. Hence the benchmark time + %% is twice what it should be. + TwoPi = 2 * math:pi(), + _ = + measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (State0) -> + {U1, State1} = Mod:uniform_real_s(State0), + {U2, State2} = Mod:uniform_s(State1), + R = math:sqrt(-2.0 * math:log(U1)), + T = TwoPi * U2, + Z0 = R * math:cos(T), + Z1 = R * math:sin(T), + ?CHECK_NORMAL({Z0 + Z1, State2}, X, State3) + end, + State) + end, + exrop, TMarkNormalFloat), + ok. + +-define(LOOP_MEASURE, (?LOOP div 5)). + +measure_loop(Fun, State) -> + measure_loop(Fun, State, ?LOOP_MEASURE). +%% +measure_loop(Fun, State, N) when 0 < N -> + measure_loop(Fun, Fun(State), N-1); +measure_loop(_, _, _) -> ok. -measure_1(Algo, RangeFun, TMark, Gen) -> +measure_1(RangeFun, Fun, Algs) -> + TMark = measure_1(RangeFun, Fun, hd(Algs), undefined), + [TMark] ++ + [measure_1(RangeFun, Fun, Alg, TMark) || Alg <- tl(Algs)]. + +measure_1(RangeFun, Fun, Alg, TMark) -> Parent = self(), - Seed = - case Algo of + {Mod, State} = + case Alg of crypto64 -> - crypto64_seed(); + {rand, crypto64_seed()}; + crypto_cache -> + {rand, crypto:rand_seed_alg(crypto_cache)}; crypto -> - crypto:rand_seed_s(); + {rand, crypto:rand_seed_s()}; random -> - random:seed(os:timestamp()), get(random_seed); + {random, random:seed(os:timestamp()), get(random_seed)}; _ -> - rand:seed_s(Algo) + {rand, rand:seed_s(Alg)} end, - Range = RangeFun(Seed), + Range = RangeFun(State), Pid = spawn_link( fun() -> - Fun = fun() -> measure_2(?LOOP, Range, Seed, Gen) end, - {Time, ok} = timer:tc(Fun), + {Time, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end), Percent = case TMark of undefined -> 100; @@ -673,7 +891,8 @@ measure_1(Algo, RangeFun, TMark, Gen) -> end, io:format( "~.12w: ~p ns ~p% [16#~.16b]~n", - [Algo, (Time * 1000 + 500) div ?LOOP, Percent, Range]), + [Alg, (Time * 1000 + 500) div ?LOOP_MEASURE, + Percent, Range]), Parent ! {self(), Time}, normal end), @@ -681,21 +900,6 @@ measure_1(Algo, RangeFun, TMark, Gen) -> {Pid, Msg} -> Msg end. -measure_2(N, Range, State0, Fun) when N > 0 -> - case Fun(Range, State0) of - {int, {Random, State}} - when is_integer(Random), Random >= 1, Random =< Range -> - measure_2(N-1, Range, State, Fun); - {uniform, {Random, State}} - when is_float(Random), 0.0 =< Random, Random < 1.0 -> - measure_2(N-1, Range, State, Fun); - {normal, {Random, State}} when is_float(Random) -> - measure_2(N-1, Range, State, Fun); - Res -> - exit({error, Res, State0}) - end; -measure_2(0, _, _, _) -> ok. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% The jump sequence tests has two parts %% for those with the functional API (jump/1) diff --git a/lib/stdlib/test/stdlib.spec b/lib/stdlib/test/stdlib.spec index 3768e494b2..91712b8963 100644 --- a/lib/stdlib/test/stdlib.spec +++ b/lib/stdlib/test/stdlib.spec @@ -1 +1,2 @@ {suites,"../stdlib_test",all}. +{skip_suites,"../stdlib_test",stdlib_bench_SUITE, "bench only"}. diff --git a/lib/stdlib/test/stdlib_bench.spec b/lib/stdlib/test/stdlib_bench.spec new file mode 100644 index 0000000000..a5d1e1db80 --- /dev/null +++ b/lib/stdlib/test/stdlib_bench.spec @@ -0,0 +1,7 @@ +%% Needed to compile ,unicode_util_SUITE and string_SUITE... +{cases,"../stdlib_test",unicode_util_SUITE, []}. +{cases,"../stdlib_test",string_SUITE, []}. +{skip_suites,"../stdlib_test",unicode_util_SUITE, "bench only"}. +{skip_suites,"../stdlib_test",string_SUITE, "bench only"}. + +{suites,"../stdlib_test",[stdlib_bench_SUITE]}. diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl new file mode 100644 index 0000000000..8670e7029c --- /dev/null +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -0,0 +1,107 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2012-2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(stdlib_bench_SUITE). +-compile([export_all, nowarn_export_all]). +-include_lib("common_test/include/ct_event.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. + + +all() -> + [{group,unicode}]. + +groups() -> + [{unicode,[{repeat,5}], + [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, + string_lexemes_list, string_lexemes_binary + ]}]. + +init_per_group(_GroupName, Config) -> + Config. + +end_per_group(_GroupName, Config) -> + Config. + +init_per_suite(Config) -> + Config. + +end_per_suite(Config) -> + Config. + +init_per_testcase(_Func, Conf) -> + Conf. + +end_per_testcase(_Func, _Conf) -> + ok. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(REPEAT_NORM, 5). + +norm_nfc_list(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, list, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +norm_nfc_deep_l(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, deep_l, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +norm_nfc_binary(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, binary, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + + +string_lexemes_list(Config) -> + %% Use nth_lexeme instead of lexemes to avoid building a result of + %% large lists which causes large differences between test runs, gc? + Bin = norm_data(Config), + Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end, + {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, list, Bin, 15), + report(1000.0*Res / Mean). + +string_lexemes_binary(Config) -> + %% Use nth_lexeme instead of lexemes to avoid building a result of + %% large lists which causes large differences between test runs, gc? + Bin = norm_data(Config), + Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end, + {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, binary, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +%%% +report(Tps) -> + ct_event:notify(#event{name = benchmark_data, + data = [{suite,"stdlib_unicode"},{value,round(Tps)}]}), + Tps. + +norm_data(Config) -> + DataDir0 = proplists:get_value(data_dir, Config), + DataDir = filename:join(lists:droplast(filename:split(DataDir0))), + File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]), + {ok, Bin} = file:read_file(File), + Bin. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 90f980c0e5..05f18ef238 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -47,7 +47,7 @@ -export([to_upper_to_lower/1]). %% Run tests when debugging them --export([debug/0]). +-export([debug/0, time_func/4]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -728,7 +728,7 @@ do_measure(TestDir) -> {ok, Bin} = file:read_file(File), io:format("~p~n",[byte_size(Bin)]), Do = fun(Name, Func, Mode) -> - {N, Mean, Stddev, _} = time_func(Func, Mode, Bin), + {N, Mean, Stddev, _} = time_func(Func, Mode, Bin, 50), io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n", [Name, Mode, Mean/1000, Stddev/1000, N]) end, @@ -938,19 +938,19 @@ needs_check(_) -> true. %%%% Timer stuff -time_func(Fun, Mode, Bin) -> +time_func(Fun, Mode, Bin, Repeat) -> timer:sleep(100), %% Let emulator catch up and clean things before test runs Self = self(), Pid = spawn_link(fun() -> Str = mode(Mode, Bin), - Self ! {self(),time_func(0,0,0, Fun, Str, undefined)} + Self ! {self(),time_func(0,0,0, Fun, Str, undefined, Repeat)} end), receive {Pid,Msg} -> Msg end. -time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 -> +time_func(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat -> {Time, Res} = timer:tc(fun() -> Fun(Str) end), - time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); -time_func(N,Sum,SumSq, _, _, Res) -> + time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat); +time_func(N,Sum,SumSq, _, _, Res, _) -> Mean = round(Sum / N), Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))), {N, Mean, Stdev, Res}. diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index 03c24c7027..7dba0a2fd0 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -29,7 +29,9 @@ get/1, count/1]). --export([debug/0, id/1, bin_split/1, uc_loaded_size/0]). +-export([debug/0, id/1, bin_split/1, uc_loaded_size/0, + time_count/4 %% Used by stdlib_bench_SUITE + ]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -323,7 +325,7 @@ do_measure(Config) -> File = DataDir ++ "/NormalizationTest.txt", {ok, Bin} = file:read_file(File), Do = fun(Func, Mode) -> - {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin), + {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin, 10), io:format("~4w ~6w ~.10w ~.6wms ±~.2wms #~.2w~n", [Func, Mode, Res, Mean div 1000, Stddev div 1000, N]) end, @@ -345,19 +347,19 @@ uc_loaded_size([_|Rest]) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -time_count(Fun, Mode, Bin) -> +time_count(Fun, Mode, Bin, Repeat) -> timer:sleep(100), %% Let emulator catch up and clean things before test runs Self = self(), Pid = spawn_link(fun() -> Str = mode(Mode, Bin), - Self ! {self(),do_count(0,0,0, Fun, Str, undefined)} + Self ! {self(),do_count(0,0,0, Fun, Str, undefined, Repeat)} end), receive {Pid,Msg} -> Msg end. -do_count(N,Sum,SumSq, Fun, Str, _) when N < 10 -> +do_count(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat -> {Time, Res} = do_count(Fun, Str), - do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); -do_count(N,Sum,SumSq, _, _, Res) -> + do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat); +do_count(N,Sum,SumSq, _, _, Res, _) -> Mean = round(Sum / N), Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))), {N, Mean, Stdev, Res}. diff --git a/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt index 4bb4b1369b..d7d8f90de0 100644 --- a/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt +++ b/lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt @@ -1,23 +1,24 @@ -# GraphemeBreakTest-9.0.0.txt -# Date: 2016-06-02, 18:28:17 GMT -# © 2016 Unicode®, Inc. +# GraphemeBreakTest-10.0.0.txt +# Date: 2017-04-14, 05:40:29 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see http://www.unicode.org/reports/tr44/ # -# Default Grapheme Break Test +# Default Grapheme_Cluster_Break Test # # Format: -# <string> (# <comment>)? -# <string> contains hex Unicode code points, with -# ÷ wherever there is a break opportunity, and +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and # × wherever there is not. # <comment> the format can change, but currently it shows: # - the sample character name # - (x) the Grapheme_Cluster_Break property value for the sample character -# - [x] the rule that determines whether there is a break or not +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of GraphemeBreakTest.html # # These samples may be extended or changed in the future. # @@ -53,8 +54,8 @@ ÷ 0020 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0020 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0020 × 0308 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0020 ÷ 2640 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0020 × 0308 ÷ 2640 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0020 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0020 × 0308 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -93,8 +94,8 @@ ÷ 000D ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 000D ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 000D ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 000D ÷ 2640 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 000D ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] BOY (EBG) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] @@ -133,8 +134,8 @@ ÷ 000A ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 000A ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 000A ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 000A ÷ 2640 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 000A ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] BOY (EBG) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] @@ -173,8 +174,8 @@ ÷ 0001 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0001 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0001 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0001 ÷ 2640 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0001 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] @@ -213,8 +214,8 @@ ÷ 0300 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0300 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0300 × 0308 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0300 ÷ 2640 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0300 × 0308 ÷ 2640 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0300 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0300 × 0308 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -253,8 +254,8 @@ ÷ 0600 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0600 × 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0600 × 0308 ÷ 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0600 × 2640 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0600 × 0308 ÷ 2640 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0600 × 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] BOY (EBG) ÷ [0.3] ÷ 0600 × 0308 ÷ 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3] @@ -293,8 +294,8 @@ ÷ 0903 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0903 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0903 × 0308 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0903 ÷ 2640 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0903 × 0308 ÷ 2640 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0903 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0903 × 0308 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -333,8 +334,8 @@ ÷ 1100 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 1100 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 1100 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 1100 ÷ 2640 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 1100 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 1100 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1100 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -373,8 +374,8 @@ ÷ 1160 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 1160 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 1160 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 1160 ÷ 2640 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 1160 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 1160 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1160 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -413,8 +414,8 @@ ÷ 11A8 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 11A8 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 11A8 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 11A8 ÷ 2640 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 11A8 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 11A8 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 11A8 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -453,8 +454,8 @@ ÷ AC00 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ AC00 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ AC00 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ AC00 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ AC00 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ AC00 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ AC00 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -493,8 +494,8 @@ ÷ AC01 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ AC01 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ AC01 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ AC01 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ AC01 × 0308 ÷ 2640 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ AC01 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ AC01 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -533,8 +534,8 @@ ÷ 1F1E6 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 1F1E6 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 1F1E6 × 0308 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 1F1E6 ÷ 2640 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 2640 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 1F1E6 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F1E6 × 0308 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -573,8 +574,8 @@ ÷ 261D × 0308 × 1F3FB ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 261D × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 261D × 0308 × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 261D ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 261D × 0308 ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 261D ÷ 2640 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 261D × 0308 ÷ 2640 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 261D ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 261D × 0308 ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 261D ÷ 0378 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -613,8 +614,8 @@ ÷ 1F3FB × 0308 ÷ 1F3FB ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 1F3FB × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 1F3FB × 0308 × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 1F3FB ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 1F3FB × 0308 ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 1F3FB ÷ 2640 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 1F3FB × 0308 ÷ 2640 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 1F3FB ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F3FB × 0308 ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F3FB ÷ 0378 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -653,54 +654,54 @@ ÷ 200D × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 200D × 0308 ÷ 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 200D × 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 200D × 0308 ÷ 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) ÷ [0.3] ÷ 200D × 0308 ÷ 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] ÷ 200D ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 200D × 0308 ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] -÷ 2764 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 2764 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 2764 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 2764 × 0308 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 2764 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 2764 × 0308 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 2764 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 2764 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 2764 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 2764 × 0308 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 2764 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 2764 × 0308 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 2764 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 2764 × 0308 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 2764 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 2764 × 0308 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 2764 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 2764 × 0308 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 2764 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 2764 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 2764 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 2764 × 0308 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 2764 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 2764 × 0308 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 2764 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] -÷ 2764 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] -÷ 2764 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3] -÷ 2764 × 0308 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3] -÷ 2764 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] -÷ 2764 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] -÷ 2764 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 2764 × 0308 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 2764 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 2764 × 0308 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 2764 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] BOY (EBG) ÷ [0.3] -÷ 2764 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] -÷ 2764 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] -÷ 2764 × 0308 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] -÷ 2764 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] -÷ 2764 × 0308 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 2640 ÷ 0020 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 2640 × 0308 ÷ 0020 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 2640 ÷ 000D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 2640 × 0308 ÷ 000D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 2640 ÷ 000A ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 2640 × 0308 ÷ 000A ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 2640 ÷ 0001 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 2640 × 0308 ÷ 0001 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 2640 × 0300 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 2640 × 0308 × 0300 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 2640 ÷ 0600 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 2640 × 0308 ÷ 0600 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 2640 × 0903 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 2640 × 0308 × 0903 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 2640 ÷ 1100 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 2640 × 0308 ÷ 1100 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 2640 ÷ 1160 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 2640 × 0308 ÷ 1160 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 2640 ÷ 11A8 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 2640 × 0308 ÷ 11A8 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 2640 ÷ AC00 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 2640 × 0308 ÷ AC00 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 2640 ÷ AC01 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 2640 × 0308 ÷ AC01 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 2640 ÷ 1F1E6 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 2640 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 2640 ÷ 261D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3] +÷ 2640 × 0308 ÷ 261D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3] +÷ 2640 ÷ 1F3FB ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] +÷ 2640 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] +÷ 2640 × 200D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 2640 × 0308 × 200D ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 2640 ÷ 2640 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 2640 × 0308 ÷ 2640 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 2640 ÷ 1F466 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] BOY (EBG) ÷ [0.3] +÷ 2640 × 0308 ÷ 1F466 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] +÷ 2640 ÷ 0378 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 2640 × 0308 ÷ 0378 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 2640 ÷ D800 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 2640 × 0308 ÷ D800 ÷ # ÷ [0.2] FEMALE SIGN (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 1F466 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1F466 × 0308 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1F466 ÷ 000D ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -733,8 +734,8 @@ ÷ 1F466 × 0308 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 1F466 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 1F466 × 0308 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 1F466 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 1F466 × 0308 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 1F466 ÷ 2640 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 1F466 × 0308 ÷ 2640 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F466 × 0308 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 1F466 ÷ 0378 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -773,8 +774,8 @@ ÷ 0378 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ 0378 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ 0378 × 0308 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 0378 ÷ 2640 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ 0378 × 0308 ÷ 2640 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 0378 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0378 × 0308 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] @@ -813,8 +814,8 @@ ÷ D800 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ D800 ÷ 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] ÷ D800 ÷ 0308 × 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] -÷ D800 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] -÷ D800 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ D800 ÷ 2640 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 2640 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ D800 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3] ÷ D800 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3] ÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] @@ -840,7 +841,7 @@ ÷ 261D × 1F3FB ÷ 261D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3] ÷ 1F466 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] ÷ 200D × 1F466 × 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) × [10.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3] -÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3] +÷ 200D × 2640 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] FEMALE SIGN (Glue_After_Zwj) ÷ [0.3] ÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [11.0] BOY (EBG) ÷ [0.3] ÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3] # diff --git a/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt index 05efcf5a44..6715446aba 100644 --- a/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt +++ b/lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt @@ -1,25 +1,28 @@ -# LineBreakTest-9.0.0.txt -# Date: 2016-06-18, 00:42:06 GMT -# © 2016 Unicode®, Inc. +# LineBreakTest-10.0.0.txt +# Date: 2017-04-14, 05:40:30 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see http://www.unicode.org/reports/tr44/ # -# Default Line Break Test +# Default Line_Break Test # # Format: -# <string> (# <comment>)? -# <string> contains hex Unicode code points, with -# ÷ wherever there is a break opportunity, and +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and # × wherever there is not. # <comment> the format can change, but currently it shows: # - the sample character name # - (x) the Line_Break property value for the sample character -# - [x] the rule that determines whether there is a break or not -# Note: The Line Break tests use tailoring of numbers described in Example 7 of Section 8.2 Examples of Customization. -# They also differ from the results produced by a pair table implementation in sequences like: ZW SP CL. +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of LineBreakTest.html +# +# Note: +# The Line_Break tests use tailoring of numbers described in +# Example 7 of Section 8.2, "Examples of Customization" of UAX #14. # # These samples may be extended or changed in the future. # diff --git a/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt b/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt index e133fa8a78..71f2371c5e 100644 --- a/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt +++ b/lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt @@ -1,6 +1,6 @@ -# NormalizationTest-9.0.0.txt -# Date: 2016-04-04, 11:41:55 GMT -# © 2016 Unicode®, Inc. +# NormalizationTest-10.0.0.txt +# Date: 2017-03-08, 08:41:55 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -17653,6 +17653,10 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 0CBC 3099 093C 0334 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062;0061 0334 0CBC 093C 3099 0062; # (a◌಼◌゙◌़◌̴b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; a◌̴◌಼◌़◌゙b; ) LATIN SMALL LETTER A, KANNADA SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 05B0 094D 3099 0CCD 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062;0061 3099 094D 0CCD 05B0 0062; # (a◌ְ◌्◌゙◌್b; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; a◌゙◌्◌್◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, KANNADA SIGN VIRAMA, LATIN SMALL LETTER B 0061 0CCD 05B0 094D 3099 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062;0061 3099 0CCD 094D 05B0 0062; # (a◌್◌ְ◌्◌゙b; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; a◌゙◌್◌्◌ְb; ) LATIN SMALL LETTER A, KANNADA SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 0D3B 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062;0061 3099 094D 0D3B 05B0 0062; # (a◌ְ◌्◌゙◌഻b; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; a◌゙◌्◌഻◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN VERTICAL BAR VIRAMA, LATIN SMALL LETTER B +0061 0D3B 05B0 094D 3099 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062;0061 3099 0D3B 094D 05B0 0062; # (a◌഻◌ְ◌्◌゙b; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; a◌゙◌഻◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN VERTICAL BAR VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 0D3C 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062;0061 3099 094D 0D3C 05B0 0062; # (a◌ְ◌्◌゙◌഼b; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; a◌゙◌्◌഼◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN CIRCULAR VIRAMA, LATIN SMALL LETTER B +0061 0D3C 05B0 094D 3099 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062;0061 3099 0D3C 094D 05B0 0062; # (a◌഼◌ְ◌्◌゙b; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; a◌゙◌഼◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN CIRCULAR VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 05B0 094D 3099 0D4D 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062;0061 3099 094D 0D4D 05B0 0062; # (a◌ְ◌्◌゙◌്b; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; a◌゙◌्◌്◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MALAYALAM SIGN VIRAMA, LATIN SMALL LETTER B 0061 0D4D 05B0 094D 3099 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062;0061 3099 0D4D 094D 05B0 0062; # (a◌്◌ְ◌्◌゙b; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; a◌゙◌്◌्◌ְb; ) LATIN SMALL LETTER A, MALAYALAM SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 05B0 094D 3099 0DCA 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062;0061 3099 094D 0DCA 05B0 0062; # (a◌ְ◌्◌゙◌්b; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; a◌゙◌्◌්◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, SINHALA SIGN AL-LAKUNA, LATIN SMALL LETTER B @@ -17999,6 +18003,14 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1DF4 0315 0300 05AE 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062; # (a◌ᷴ◌̕◌̀◌֮b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; a◌֮◌ᷴ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER U WITH DIAERESIS, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1DF5 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062; # (a◌̕◌̀◌֮◌᷵b; à◌֮◌᷵◌̕b; a◌֮◌̀◌᷵◌̕b; à◌֮◌᷵◌̕b; a◌֮◌̀◌᷵◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING UP TACK ABOVE, LATIN SMALL LETTER B 0061 1DF5 0315 0300 05AE 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062; # (a◌᷵◌̕◌̀◌֮b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; a◌֮◌᷵◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING UP TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 035C 0315 0300 1DF6 0062;00E0 0315 1DF6 035C 0062;0061 0300 0315 1DF6 035C 0062;00E0 0315 1DF6 035C 0062;0061 0300 0315 1DF6 035C 0062; # (a◌͜◌̕◌̀◌᷶b; à◌̕◌᷶◌͜b; a◌̀◌̕◌᷶◌͜b; à◌̕◌᷶◌͜b; a◌̀◌̕◌᷶◌͜b; ) LATIN SMALL LETTER A, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, COMBINING KAVYKA ABOVE RIGHT, LATIN SMALL LETTER B +0061 1DF6 035C 0315 0300 0062;00E0 1DF6 0315 035C 0062;0061 0300 1DF6 0315 035C 0062;00E0 1DF6 0315 035C 0062;0061 0300 1DF6 0315 035C 0062; # (a◌᷶◌͜◌̕◌̀b; à◌᷶◌̕◌͜b; a◌̀◌᷶◌̕◌͜b; à◌᷶◌̕◌͜b; a◌̀◌᷶◌̕◌͜b; ) LATIN SMALL LETTER A, COMBINING KAVYKA ABOVE RIGHT, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, LATIN SMALL LETTER B +0061 0300 05AE 1D16D 1DF7 0062;00E0 1D16D 05AE 1DF7 0062;0061 1D16D 05AE 1DF7 0300 0062;00E0 1D16D 05AE 1DF7 0062;0061 1D16D 05AE 1DF7 0300 0062; # (a◌̀◌𝅭֮◌᷷b; à𝅭◌֮◌᷷b; a𝅭◌֮◌᷷◌̀b; à𝅭◌֮◌᷷b; a𝅭◌֮◌᷷◌̀b; ) LATIN SMALL LETTER A, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, COMBINING KAVYKA ABOVE LEFT, LATIN SMALL LETTER B +0061 1DF7 0300 05AE 1D16D 0062;00E0 1D16D 1DF7 05AE 0062;0061 1D16D 1DF7 05AE 0300 0062;00E0 1D16D 1DF7 05AE 0062;0061 1D16D 1DF7 05AE 0300 0062; # (a◌᷷◌̀◌𝅭֮b; à𝅭◌᷷◌֮b; a𝅭◌᷷◌֮◌̀b; à𝅭◌᷷◌֮b; a𝅭◌᷷◌֮◌̀b; ) LATIN SMALL LETTER A, COMBINING KAVYKA ABOVE LEFT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, LATIN SMALL LETTER B +0061 0300 05AE 1D16D 1DF8 0062;00E0 1D16D 05AE 1DF8 0062;0061 1D16D 05AE 1DF8 0300 0062;00E0 1D16D 05AE 1DF8 0062;0061 1D16D 05AE 1DF8 0300 0062; # (a◌̀◌𝅭֮◌᷸b; à𝅭◌֮◌᷸b; a𝅭◌֮◌᷸◌̀b; à𝅭◌֮◌᷸b; a𝅭◌֮◌᷸◌̀b; ) LATIN SMALL LETTER A, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, COMBINING DOT ABOVE LEFT, LATIN SMALL LETTER B +0061 1DF8 0300 05AE 1D16D 0062;00E0 1D16D 1DF8 05AE 0062;0061 1D16D 1DF8 05AE 0300 0062;00E0 1D16D 1DF8 05AE 0062;0061 1D16D 1DF8 05AE 0300 0062; # (a◌᷸◌̀◌𝅭֮b; à𝅭◌᷸◌֮b; a𝅭◌᷸◌֮◌̀b; à𝅭◌᷸◌֮b; a𝅭◌᷸◌֮◌̀b; ) LATIN SMALL LETTER A, COMBINING DOT ABOVE LEFT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, MUSICAL SYMBOL COMBINING AUGMENTATION DOT, LATIN SMALL LETTER B +0061 059A 0316 302A 1DF9 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062;0061 302A 0316 1DF9 059A 0062; # (a◌֚◌̖◌〪◌᷹b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; a◌〪◌̖◌᷹◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, COMBINING WIDE INVERTED BRIDGE BELOW, LATIN SMALL LETTER B +0061 1DF9 059A 0316 302A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062;0061 302A 1DF9 0316 059A 0062; # (a◌᷹◌֚◌̖◌〪b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; a◌〪◌᷹◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING WIDE INVERTED BRIDGE BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, LATIN SMALL LETTER B 0061 0315 0300 05AE 1DFB 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062; # (a◌̕◌̀◌֮◌᷻b; à◌֮◌᷻◌̕b; a◌֮◌̀◌᷻◌̕b; à◌֮◌᷻◌̕b; a◌֮◌̀◌᷻◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DELETION MARK, LATIN SMALL LETTER B 0061 1DFB 0315 0300 05AE 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062; # (a◌᷻◌̕◌̀◌֮b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; a◌֮◌᷻◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DELETION MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 035D 035C 0315 1DFC 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062; # (a◌͝◌͜◌̕◌᷼b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; a◌̕◌͜◌᷼◌͝b; ) LATIN SMALL LETTER A, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING DOUBLE INVERTED BREVE BELOW, LATIN SMALL LETTER B @@ -18397,8 +18409,20 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 116B7 3099 093C 0334 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062; # (a◌𑚷◌゙◌़◌̴b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; a◌̴◌𑚷◌़◌゙b; ) LATIN SMALL LETTER A, TAKRI SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 05B0 094D 3099 1172B 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062; # (a◌ְ◌्◌゙◌𑜫b; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; a◌゙◌्◌𑜫◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, AHOM SIGN KILLER, LATIN SMALL LETTER B 0061 1172B 05B0 094D 3099 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062; # (a◌𑜫◌ְ◌्◌゙b; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; a◌゙◌𑜫◌्◌ְb; ) LATIN SMALL LETTER A, AHOM SIGN KILLER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 11A34 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062;0061 3099 094D 11A34 05B0 0062; # (a◌ְ◌्◌゙◌𑨴b; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; a◌゙◌्◌𑨴◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, ZANABAZAR SQUARE SIGN VIRAMA, LATIN SMALL LETTER B +0061 11A34 05B0 094D 3099 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062;0061 3099 11A34 094D 05B0 0062; # (a◌𑨴◌ְ◌्◌゙b; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; a◌゙◌𑨴◌्◌ְb; ) LATIN SMALL LETTER A, ZANABAZAR SQUARE SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 11A47 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062;0061 3099 094D 11A47 05B0 0062; # (a◌ְ◌्◌゙◌𑩇b; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; a◌゙◌्◌𑩇◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, ZANABAZAR SQUARE SUBJOINER, LATIN SMALL LETTER B +0061 11A47 05B0 094D 3099 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062;0061 3099 11A47 094D 05B0 0062; # (a◌𑩇◌ְ◌्◌゙b; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; a◌゙◌𑩇◌्◌ְb; ) LATIN SMALL LETTER A, ZANABAZAR SQUARE SUBJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 11A99 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062;0061 3099 094D 11A99 05B0 0062; # (a◌ְ◌्◌゙◌𑪙b; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; a◌゙◌्◌𑪙◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, SOYOMBO SUBJOINER, LATIN SMALL LETTER B +0061 11A99 05B0 094D 3099 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062;0061 3099 11A99 094D 05B0 0062; # (a◌𑪙◌ְ◌्◌゙b; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; a◌゙◌𑪙◌्◌ְb; ) LATIN SMALL LETTER A, SOYOMBO SUBJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 05B0 094D 3099 11C3F 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062; # (a◌ְ◌्◌゙◌𑰿b; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; a◌゙◌्◌𑰿◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, BHAIKSUKI SIGN VIRAMA, LATIN SMALL LETTER B 0061 11C3F 05B0 094D 3099 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062; # (a◌𑰿◌ְ◌्◌゙b; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; a◌゙◌𑰿◌्◌ְb; ) LATIN SMALL LETTER A, BHAIKSUKI SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 3099 093C 0334 11D42 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062;0061 0334 093C 11D42 3099 0062; # (a◌゙◌़◌̴◌𑵂b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; a◌̴◌़◌𑵂◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, MASARAM GONDI SIGN NUKTA, LATIN SMALL LETTER B +0061 11D42 3099 093C 0334 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062;0061 0334 11D42 093C 3099 0062; # (a◌𑵂◌゙◌़◌̴b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; a◌̴◌𑵂◌़◌゙b; ) LATIN SMALL LETTER A, MASARAM GONDI SIGN NUKTA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B +0061 05B0 094D 3099 11D44 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062;0061 3099 094D 11D44 05B0 0062; # (a◌ְ◌्◌゙◌𑵄b; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; a◌゙◌्◌𑵄◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MASARAM GONDI SIGN HALANTA, LATIN SMALL LETTER B +0061 11D44 05B0 094D 3099 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062;0061 3099 11D44 094D 05B0 0062; # (a◌𑵄◌ְ◌्◌゙b; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; a◌゙◌𑵄◌्◌ְb; ) LATIN SMALL LETTER A, MASARAM GONDI SIGN HALANTA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 11D45 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062;0061 3099 094D 11D45 05B0 0062; # (a◌ְ◌्◌゙◌𑵅b; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; a◌゙◌्◌𑵅◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, MASARAM GONDI VIRAMA, LATIN SMALL LETTER B +0061 11D45 05B0 094D 3099 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062;0061 3099 11D45 094D 05B0 0062; # (a◌𑵅◌ְ◌्◌゙b; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; a◌゙◌𑵅◌्◌ְb; ) LATIN SMALL LETTER A, MASARAM GONDI VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 093C 0334 16AF0 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062; # (a◌़◌̴◌𖫰b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; a◌̴◌𖫰◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING HIGH TONE, LATIN SMALL LETTER B 0061 16AF0 093C 0334 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062; # (a◌𖫰◌़◌̴b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; a◌𖫰◌̴◌़b; ) LATIN SMALL LETTER A, BASSA VAH COMBINING HIGH TONE, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 093C 0334 16AF1 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062; # (a◌़◌̴◌𖫱b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; a◌̴◌𖫱◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING LOW TONE, LATIN SMALL LETTER B diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl new file mode 100644 index 0000000000..c625da56c6 --- /dev/null +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -0,0 +1,844 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-export([all/0, suite/0,groups/0, + normalize/1, + parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1, + parse_binary_host_ipv6/1, + parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1, + parse_binary_pct_encoded_userinfo/1, parse_binary_port/1, + parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1, + parse_fragment/1, parse_host/1, parse_host_ipv4/1, parse_host_ipv6/1, + parse_path/1, parse_pct_encoded_fragment/1, parse_pct_encoded_query/1, + parse_pct_encoded_userinfo/1, parse_port/1, + parse_query/1, parse_scheme/1, parse_userinfo/1, + parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1, + parse_special/1, parse_special2/1, parse_negative/1, + recompose_fragment/1, recompose_parse_fragment/1, + recompose_query/1, recompose_parse_query/1, + recompose_path/1, recompose_parse_path/1, + recompose_autogen/1, parse_recompose_autogen/1, + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 + ]). + + +-define(SCHEME, "foo"). +-define(USERINFO, "åsa"). +-define(USERINFO_ENC, "%C3%A5sa"). +-define(HOST, "älvsjö"). +-define(HOST_ENC, "%C3%A4lvsj%C3%B6"). +-define(IPV6, "::127.0.0.1"). +-define(IPV6_ENC, "[::127.0.0.1]"). +-define(PORT, 8042). +-define(PORT_ENC, ":8042"). +-define(PATH, "/där"). +-define(PATH_ENC, "/d%C3%A4r"). +-define(QUERY, "name=örn"). +-define(QUERY_ENC, "?name=%C3%B6rn"). +-define(FRAGMENT, "näsa"). +-define(FRAGMENT_ENC, "#n%C3%A4sa"). + + +suite() -> + [{timetrap,{minutes,1}}]. + +all() -> + [ + normalize, + parse_binary_scheme, + parse_binary_userinfo, + parse_binary_pct_encoded_userinfo, + parse_binary_host, + parse_binary_host_ipv4, + parse_binary_host_ipv6, + parse_binary_port, + parse_binary_path, + parse_binary_query, + parse_binary_pct_encoded_query, + parse_binary_fragment, + parse_binary_pct_encoded_fragment, + parse_scheme, + parse_userinfo, + parse_pct_encoded_userinfo, + parse_host, + parse_host_ipv4, + parse_host_ipv6, + parse_port, + parse_path, + parse_query, + parse_pct_encoded_query, + parse_fragment, + parse_pct_encoded_fragment, + parse_list, + parse_binary, + parse_mixed, + parse_relative, + parse_special, + parse_special2, + parse_negative, + recompose_fragment, + recompose_parse_fragment, + recompose_query, + recompose_parse_query, + recompose_path, + recompose_parse_path, + recompose_autogen, + parse_recompose_autogen, + transcode_basic, + transcode_options, + transcode_mixed, + transcode_negative + ]. + +groups() -> + []. + + +%%------------------------------------------------------------------------- +%% Helper functions +%%------------------------------------------------------------------------- +uri_combinations() -> + [[Sch,Usr,Hst,Prt,Pat,Qry,Frg] || + Sch <- [fun update_scheme/1, fun update_scheme_binary/1, none], + Usr <- [fun update_userinfo/1, fun update_userinfo_binary/1, none], + Hst <- [fun update_host/1, fun update_host_binary/1, + fun update_ipv6/1, fun update_ipv6_binary/1, none], + Prt <- [fun update_port/1, none], + Pat <- [fun update_path/1, fun update_path_binary/1], + Qry <- [fun update_query/1,fun update_query_binary/1, none], + Frg <- [fun update_fragment/1, fun update_fragment_binary/1, none], + not (Usr =:= none andalso Hst =:= none andalso Prt =/= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =:= none), + not (Usr =/= none andalso Hst =:= none andalso Prt =/= none)]. + + +generate_test_vector(Comb) -> + Fun = fun (F, {Map, URI}) when is_function(F) -> F({Map, URI}); + (_, Map) -> Map + end, + lists:foldl(Fun, {#{}, empty}, Comb). + +generate_test_vectors(L) -> + lists:map(fun generate_test_vector/1, L). + +update_fragment({In, empty}) -> + {In#{fragment => ?FRAGMENT}, ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_list(Out) -> + {In#{fragment => ?FRAGMENT}, Out ++ ?FRAGMENT_ENC}; +update_fragment({In, Out}) when is_binary(Out) -> + {In#{fragment => ?FRAGMENT}, binary_to_list(Out) ++ ?FRAGMENT_ENC}. + +update_fragment_binary({In, empty}) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<?FRAGMENT_ENC>>}; +update_fragment_binary({In, Out}) when is_list(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, Out ++ ?FRAGMENT_ENC}; +update_fragment_binary({In, Out}) when is_binary(Out) -> + {In#{fragment => <<?FRAGMENT/utf8>>}, <<Out/binary,?FRAGMENT_ENC>>}. + + +update_query({In, empty}) -> + {In#{query => ?QUERY}, ?QUERY_ENC}; +update_query({In, Out}) when is_list(Out) -> + {In#{query => ?QUERY}, Out ++ ?QUERY_ENC}; +update_query({In, Out}) when is_binary(Out) -> + {In#{query => ?QUERY}, binary_to_list(Out) ++ ?QUERY_ENC}. + +update_query_binary({In, empty}) -> + {In#{query => <<?QUERY/utf8>>}, <<?QUERY_ENC>>}; +update_query_binary({In, Out}) when is_list(Out) -> + {In#{query => <<?QUERY/utf8>>}, Out ++ ?QUERY_ENC}; +update_query_binary({In, Out}) when is_binary(Out) -> + {In#{query => <<?QUERY/utf8>>}, <<Out/binary,?QUERY_ENC>>}. + +update_path({In, empty}) -> + {In#{path => ?PATH}, ?PATH_ENC}; +update_path({In, Out}) when is_list(Out) -> + {In#{path => ?PATH}, Out ++ ?PATH_ENC}; +update_path({In, Out}) when is_binary(Out) -> + {In#{path => ?PATH}, binary_to_list(Out) ++ ?PATH_ENC}. + +update_path_binary({In, empty}) -> + {In#{path => <<?PATH/utf8>>}, <<?PATH_ENC>>}; +update_path_binary({In, Out}) when is_list(Out) -> + {In#{path => <<?PATH/utf8>>}, Out ++ ?PATH_ENC}; +update_path_binary({In, Out}) when is_binary(Out) -> + {In#{path => <<?PATH/utf8>>}, <<Out/binary,?PATH_ENC>>}. + +update_port({In, Out}) when is_list(Out) -> + {In#{port => ?PORT}, Out ++ ?PORT_ENC}; +update_port({In, Out}) when is_binary(Out) -> + {In#{port => ?PORT}, <<Out/binary,?PORT_ENC>>}. + +update_host({In, empty}) -> + {In#{host => ?HOST}, "//" ++ ?HOST_ENC}; +update_host({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$@|?HOST_ENC]}; + false -> {In#{host => ?HOST}, binary_to_list(Out) ++ [$/,$/|?HOST_ENC]} + end. + +update_host_binary({In, empty}) -> + {In#{host => <<?HOST/utf8>>}, <<"//",?HOST_ENC>>}; +update_host_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, Out ++ [$@|?HOST_ENC]}; + false -> {In#{host => <<?HOST/utf8>>}, Out ++ [$/,$/|?HOST_ENC]} + end; +update_host_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?HOST/utf8>>}, <<Out/binary,$@,?HOST_ENC>>}; + false-> {In#{host => <<?HOST/utf8>>}, <<Out/binary,"//",?HOST_ENC>>} + end. + +update_ipv6({In, empty}) -> + {In#{host => ?IPV6}, "//" ++ ?IPV6_ENC}; +update_ipv6({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$@|?IPV6_ENC]}; + false -> {In#{host => ?IPV6}, binary_to_list(Out) ++ [$/,$/|?IPV6_ENC]} + end. + +update_ipv6_binary({In, empty}) -> + {In#{host => <<?IPV6/utf8>>}, <<"//",?IPV6_ENC>>}; +update_ipv6_binary({In, Out}) when is_list(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$@|?IPV6_ENC]}; + false -> {In#{host => <<?IPV6/utf8>>}, Out ++ [$/,$/|?IPV6_ENC]} + end; +update_ipv6_binary({In, Out}) when is_binary(Out) -> + case maps:is_key(userinfo, In) of + true -> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,$@,?IPV6_ENC>>}; + false-> {In#{host => <<?IPV6/utf8>>}, <<Out/binary,"//",?IPV6_ENC>>} + end. + +update_userinfo({In, empty}) -> + {In#{userinfo => ?USERINFO}, "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_list(Out) -> + {In#{userinfo => ?USERINFO}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo({In, Out}) when is_binary(Out) -> + {In#{userinfo => ?USERINFO}, binary_to_list(Out) ++ "//" ++ ?USERINFO_ENC}. + +update_userinfo_binary({In, empty}) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<"//",?USERINFO_ENC>>}; +update_userinfo_binary({In, Out}) when is_list(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, Out ++ "//" ++ ?USERINFO_ENC}; +update_userinfo_binary({In, Out}) when is_binary(Out) -> + {In#{userinfo => <<?USERINFO/utf8>>}, <<Out/binary,"//",?USERINFO_ENC>>}. + +update_scheme({In, empty}) -> + {In#{scheme => ?SCHEME}, ?SCHEME ++ ":"}. + +update_scheme_binary({In, empty}) -> + {In#{scheme => <<?SCHEME/utf8>>}, <<?SCHEME,$:>>}. + + +%% Test recompose on a generated test vector +run_test_recompose({#{}, empty}) -> + try "" = uri_string:recompose(#{}) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_recompose({Map, URI}) -> + try URI = uri_string:recompose(Map) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + +%% Test parse - recompose on a generated test vector +run_test_parse_recompose({#{}, empty}) -> + try "" = uri_string:recompose(uri_string:parse("")) of + _ -> ok + catch + _:_ -> error({test_failed, #{}, ""}) + end; +run_test_parse_recompose({Map, URI}) -> + try URI = uri_string:recompose(uri_string:parse(URI)) of + URI -> ok + catch + _:_ -> error({test_failed, Map, URI}) + end. + + +%%------------------------------------------------------------------------- +%% Parse tests +%%------------------------------------------------------------------------- + +parse_binary_scheme(_Config) -> + #{} = uri_string:parse(<<>>), + #{path := <<"foo">>} = uri_string:parse(<<"foo">>), + #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>), + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>), + + #{path := <<"/">>} = uri_string:parse(<<"/">>), + #{host := <<>>} = uri_string:parse(<<"//">>), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). + +parse_binary_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"password@localhost">>} = + uri_string:parse(<<"user:password@localhost">>), + #{path := <<"user@">>} = uri_string:parse(<<"user@">>), + #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>), + #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>), + #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>), + #{userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"//user:password@localhost">>), + #{scheme := <<"foo">>, path := <<"/user@">>} = + uri_string:parse(<<"foo:/user@">>), + #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user@localhost">>), + #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user:password@localhost">>). + +parse_binary_pct_encoded_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"合@気道"/utf8>>} = + uri_string:parse(<<"user:%E5%90%88@%E6%B0%97%E9%81%93">>), + #{path := <<"合気道@"/utf8>>} = uri_string:parse(<<"%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"/合気道@"/utf8>>} = uri_string:parse(<<"/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{path := <<"合@気道"/utf8>>} = uri_string:parse(<<"%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93">>), + #{userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"//%E5%90%88:%E6%B0%97@%E9%81%93">>), + #{scheme := <<"foo">>, path := <<"/合気道@"/utf8>>} = + uri_string:parse(<<"foo:/%E5%90%88%E6%B0%97%E9%81%93@">>), + #{scheme := <<"foo">>, userinfo := <<"合"/utf8>>, host := <<"気道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93">>), + #{scheme := <<"foo">>, userinfo := <<"合:気"/utf8>>, host := <<"道"/utf8>>} = + uri_string:parse(<<"foo://%E5%90%88:%E6%B0%97@%E9%81%93">>), + {error,invalid_uri,"@"} = uri_string:parse(<<"//%E5%90%88@%E6%B0%97%E9%81%93@">>), + {error,invalid_uri,":"} = uri_string:parse(<<"foo://%E5%90%88@%E6%B0%97%E9%81%93@">>). + +parse_binary_host(_Config) -> + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} = + uri_string:parse(<<"foo://user@hostname">>). + +parse_binary_host_ipv4(_Config) -> + #{host := <<"127.0.0.1">>} = uri_string:parse(<<"//127.0.0.1">>), + #{host := <<"127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//127.0.0.1/over/there">>), + #{host := <<"127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//127.0.0.1?name=ferret">>), + #{host := <<"127.0.0.1">>, fragment := <<"nose">>} = uri_string:parse(<<"//127.0.0.1#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//127.0.0.x">>), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse(<<"//1227.0.0.1">>). + +parse_binary_host_ipv6(_Config) -> + #{host := <<"::127.0.0.1">>} = uri_string:parse(<<"//[::127.0.0.1]">>), + #{host := <<"2001:0db8:0000:0000:0000:0000:1428:07ab">>} = + uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:07ab]">>), + #{host := <<"::127.0.0.1">>, path := <<"/over/there">>} = + uri_string:parse(<<"//[::127.0.0.1]/over/there">>), + #{host := <<"::127.0.0.1">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//[::127.0.0.1]?name=ferret">>), + #{host := <<"::127.0.0.1">>, fragment := <<"nose">>} = + uri_string:parse(<<"//[::127.0.0.1]#nose">>), + {error,invalid_uri,"x"} = uri_string:parse(<<"//[::127.0.0.x]">>), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse(<<"//[::1227.0.0.1]">>), + {error,invalid_uri,"G"} = uri_string:parse(<<"//[2001:0db8:0000:0000:0000:0000:1428:G7ab]">>). + +parse_binary_port(_Config) -> + #{path:= <<"/:8042">>} = + uri_string:parse(<<"/:8042">>), + #{host:= <<>>, port := 8042} = + uri_string:parse(<<"//:8042">>), + #{host := <<"example.com">>, port:= 8042} = + uri_string:parse(<<"//example.com:8042">>), + #{scheme := <<"foo">>, path := <<"/:8042">>} = + uri_string:parse(<<"foo:/:8042">>), + #{scheme := <<"foo">>, host := <<>>, port := 8042} = + uri_string:parse(<<"foo://:8042">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042">>), + {error,invalid_uri,":"} = uri_string:parse(":600"), + {error,invalid_uri,"x"} = uri_string:parse("//:8042x"). + +parse_binary_path(_Config) -> + #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>), + #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>), + #{scheme := <<"foo">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo:/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo://example.com/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042/over/there">>). + +parse_binary_query(_Config) -> + #{scheme := <<"foo">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:over/there?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo:/over/there?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"foo://example.com/?name=ferret">>), + + #{path := <<>>, query := <<"name=ferret">>} = + uri_string:parse(<<"?name=ferret">>), + #{path := <<"over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"over/there?name=ferret">>), + #{path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/?name=ferret">>), + #{path := <<"/over/there">>, query := <<"name=ferret">>} = + uri_string:parse(<<"/over/there?name=ferret">>), + #{host := <<"example.com">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com?name=ferret">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=ferret">>} = + uri_string:parse(<<"//example.com/?name=ferret">>). + +parse_binary_pct_encoded_query(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, + query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"name=合気道"/utf8>>} = + uri_string:parse(<<"//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_binary_fragment(_Config) -> + #{scheme := <<"foo">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:#nose">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:over/there#nose">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:/over/there#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com/#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + + #{fragment := <<"nose">>} = + uri_string:parse(<<"#nose">>), + #{path := <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"over/there#nose">>), + #{path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"/#nose">>), + #{path := <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"/over/there#nose">>), + #{host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com#nose">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com/#nose">>). + +parse_binary_pct_encoded_fragment(_Config) -> + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"foo://example.com#%E5%90%88%E6%B0%97%E9%81%93">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"合気道"/utf8>>} = + uri_string:parse(<<"//example.com/#%E5%90%88%E6%B0%97%E9%81%93">>). + +parse_scheme(_Config) -> + #{} = uri_string:parse(""), + #{path := "foo"} = uri_string:parse("foo"), + #{scheme := "foo"} = uri_string:parse("foo:"), + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := ""} = uri_string:parse("foo://"), + #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"), + #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"), + + #{path := "/"} = uri_string:parse("/"), + #{host := ""} = uri_string:parse("//"), + #{host := "", path := "/"} = uri_string:parse("///"). + +parse_userinfo(_Config) -> + #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"), + #{path := "user@"} = uri_string:parse("user@"), + #{path := "/user@"} = uri_string:parse("/user@"), + #{path := "user@localhost"} = uri_string:parse("user@localhost"), + #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"), + #{userinfo := "user:password", host := "localhost"} = + uri_string:parse("//user:password@localhost"), + #{scheme := "foo", path := "/user@"} = + uri_string:parse("foo:/user@"), + #{scheme := "foo", userinfo := "user", host := "localhost"} = + uri_string:parse("foo://user@localhost"), + #{scheme := "foo", userinfo := "user:password", host := "localhost"} = + uri_string:parse("foo://user:password@localhost"). + +parse_pct_encoded_userinfo(_Config) -> + #{scheme := "user", path := "合@気道"} = + uri_string:parse("user:%E5%90%88@%E6%B0%97%E9%81%93"), + #{path := "合気道@"} = uri_string:parse("%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "/合気道@"} = uri_string:parse("/%E5%90%88%E6%B0%97%E9%81%93@"), + #{path := "合@気道"} = uri_string:parse("%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合", host := "気道"} = + uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93"), + #{userinfo := "合:気", host := "道"} = + uri_string:parse("//%E5%90%88:%E6%B0%97@%E9%81%93"), + #{scheme := "foo", path := "/合気道@"} = + uri_string:parse("foo:/%E5%90%88%E6%B0%97%E9%81%93@"), + #{scheme := "foo", userinfo := "合", host := "気道"} = + uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93"), + #{scheme := "foo", userinfo := "合:気", host := "道"} = + uri_string:parse("foo://%E5%90%88:%E6%B0%97@%E9%81%93"), + {error,invalid_uri,"@"} = uri_string:parse("//%E5%90%88@%E6%B0%97%E9%81%93@"), + {error,invalid_uri,":"} = uri_string:parse("foo://%E5%90%88@%E6%B0%97%E9%81%93@"). + + +parse_host(_Config) -> + #{host := "hostname"} = uri_string:parse("//hostname"), + #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"), + #{host := "hostname",scheme := "foo", userinfo := "user"} = + uri_string:parse("foo://user@hostname"). + +parse_host_ipv4(_Config) -> + #{host := "127.0.0.1"} = uri_string:parse("//127.0.0.1"), + #{host := "2001:0db8:0000:0000:0000:0000:1428:07ab"} = + uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:07ab]"), + #{host := "127.0.0.1", path := "/over/there"} = uri_string:parse("//127.0.0.1/over/there"), + #{host := "127.0.0.1", query := "name=ferret"} = uri_string:parse("//127.0.0.1?name=ferret"), + #{host := "127.0.0.1", fragment := "nose"} = uri_string:parse("//127.0.0.1#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//127.0.0.x"), + {error,invalid_uri,"1227.0.0.1"} = uri_string:parse("//1227.0.0.1"). + +parse_host_ipv6(_Config) -> + #{host := "::127.0.0.1"} = uri_string:parse("//[::127.0.0.1]"), + #{host := "::127.0.0.1", path := "/over/there"} = uri_string:parse("//[::127.0.0.1]/over/there"), + #{host := "::127.0.0.1", query := "name=ferret"} = + uri_string:parse("//[::127.0.0.1]?name=ferret"), + #{host := "::127.0.0.1", fragment := "nose"} = uri_string:parse("//[::127.0.0.1]#nose"), + {error,invalid_uri,"x"} = uri_string:parse("//[::127.0.0.x]"), + {error,invalid_uri,"::1227.0.0.1"} = uri_string:parse("//[::1227.0.0.1]"), + {error,invalid_uri,"G"} = uri_string:parse("//[2001:0db8:0000:0000:0000:0000:1428:G7ab]"). + +parse_port(_Config) -> + #{path:= "/:8042"} = + uri_string:parse("/:8042"), + #{host:= "", port := 8042} = + uri_string:parse("//:8042"), + #{host := "example.com", port:= 8042} = + uri_string:parse("//example.com:8042"), + #{scheme := "foo", path := "/:8042"} = + uri_string:parse("foo:/:8042"), + #{scheme := "foo", host := "", port := 8042} = + uri_string:parse("foo://:8042"), + #{scheme := "foo", host := "example.com", port := 8042} = + uri_string:parse("foo://example.com:8042"). + +parse_path(_Config) -> + #{path := "over/there"} = uri_string:parse("over/there"), + #{path := "/over/there"} = uri_string:parse("/over/there"), + #{scheme := "foo", path := "/over/there"} = + uri_string:parse("foo:/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there"} = + uri_string:parse("foo://example.com/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} = + uri_string:parse("foo://example.com:8042/over/there"). + +parse_query(_Config) -> + #{scheme := "foo", query := "name=ferret"} = + uri_string:parse("foo:?name=ferret"), + #{scheme := "foo", path:= "over/there", query := "name=ferret"} = + uri_string:parse("foo:over/there?name=ferret"), + #{scheme := "foo", path:= "/over/there", query := "name=ferret"} = + uri_string:parse("foo:/over/there?name=ferret"), + #{scheme := "foo", host := "example.com", query := "name=ferret"} = + uri_string:parse("foo://example.com?name=ferret"), + #{scheme := "foo", host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("foo://example.com/?name=ferret"), + + #{path := "", query := "name=ferret"} = + uri_string:parse("?name=ferret"), + #{path := "over/there", query := "name=ferret"} = + uri_string:parse("over/there?name=ferret"), + #{path := "/", query := "name=ferret"} = + uri_string:parse("/?name=ferret"), + #{path := "/over/there", query := "name=ferret"} = + uri_string:parse("/over/there?name=ferret"), + #{host := "example.com", query := "name=ferret"} = + uri_string:parse("//example.com?name=ferret"), + #{host := "example.com", path := "/", query := "name=ferret"} = + uri_string:parse("//example.com/?name=ferret"). + +parse_pct_encoded_query(_Config) -> + #{scheme := "foo", host := "example.com", path := "/", + query := "name=合気道"} = + uri_string:parse("foo://example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", query := "name=合気道"} = + uri_string:parse("//example.com/?name=%E5%90%88%E6%B0%97%E9%81%93"). + +parse_fragment(_Config) -> + #{scheme := "foo", fragment := "nose"} = + uri_string:parse("foo:#nose"), + #{scheme := "foo", path:= "over/there", fragment := "nose"} = + uri_string:parse("foo:over/there#nose"), + #{scheme := "foo", path:= "/over/there", fragment := "nose"} = + uri_string:parse("foo:/over/there#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("foo://example.com/#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + + #{fragment := "nose"} = + uri_string:parse("#nose"), + #{path := "over/there", fragment := "nose"} = + uri_string:parse("over/there#nose"), + #{path := "/", fragment := "nose"} = + uri_string:parse("/#nose"), + #{path := "/over/there", fragment := "nose"} = + uri_string:parse("/over/there#nose"), + #{host := "example.com", fragment := "nose"} = + uri_string:parse("//example.com#nose"), + #{host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("//example.com/#nose"). + +parse_pct_encoded_fragment(_Config) -> + #{scheme := "foo", host := "example.com", fragment := "合気道"} = + uri_string:parse("foo://example.com#%E5%90%88%E6%B0%97%E9%81%93"), + #{host := "example.com", path := "/", fragment := "合気道"} = + uri_string:parse("//example.com/#%E5%90%88%E6%B0%97%E9%81%93"). + +parse_list(_Config) -> + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"), + #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042, + path := "/over/there", query := "name=ferret", fragment := "nose"} = + uri_string:parse("foo://admin:[email protected]:8042/over/there?name=ferret#nose"). + +parse_binary(_Config) -> + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>), + #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://admin:[email protected]:8042/over/there?name=ferret#nose">>). + + +parse_mixed(_Config) -> + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("fo",<<"o:bar">>)), + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("foo:b",<<"ar">>)), + #{scheme := "foo", path := "bar:bar"} = + uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]). + +parse_relative(_Config) -> + #{path := "/path"} = + uri_string:parse(lists:append("/pa",<<"th">>)), + #{path := "foo"} = + uri_string:parse(lists:append("fo",<<"o">>)). + +parse_special(_Config) -> + #{host := [],query := []} = uri_string:parse("//?"), + #{fragment := [],host := []} = uri_string:parse("//#"), + #{host := [],query := [],scheme := "foo"} = uri_string:parse("foo://?"), + #{fragment := [],host := [],scheme := "foo"} = uri_string:parse("foo://#"), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>), + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>), + #{host := [],path := "/",query := []} = uri_string:parse("///?"), + #{fragment := [],host := [],path := "/"} = uri_string:parse("///#"), + #{host := "foo",query := []} = uri_string:parse("//foo?"), + #{fragment := [],host := "foo"} = uri_string:parse("//foo#"), + #{host := "foo",path := "/"} = uri_string:parse("//foo/"), + #{host := "foo",query := [],scheme := "http"} = uri_string:parse("http://foo?"), + #{fragment := [],host := "foo",scheme := "http"} = uri_string:parse("http://foo#"), + #{host := "foo",path := "/",scheme := "http"} = uri_string:parse("http://foo/"), + #{fragment := [],host := "host",port := 80,scheme := "http"} = uri_string:parse("http://host:80#"), + #{host := "host",port := 80,query := [],scheme := "http"} = uri_string:parse("http://host:80?"), + #{path := [],query := []} = uri_string:parse("?"), + #{path := [],query := "?"} = uri_string:parse("??"), + #{path := [],query := "??"} = uri_string:parse("???"). + +parse_special2(_Config) -> + #{host := [],path := "/",port := 1,scheme := "a"} = uri_string:parse("a://:1/"), + #{path := "/a/",scheme := "a"} = uri_string:parse("a:/a/"), + #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), + #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), + #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"), + #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"), + #{host := [],path := [],port := undefined} = uri_string:parse("//:"). + +parse_negative(_Config) -> + {error,invalid_uri,"å"} = uri_string:parse("å"), + {error,invalid_uri,"å"} = uri_string:parse("aå:/foo"), + {error,invalid_uri,":"} = uri_string:parse("foo://usär@host"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path?foö=bar"), + {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"), + {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"), + {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"), + {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"), + {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8"). + + +%%------------------------------------------------------------------------- +%% Recompose tests +%%------------------------------------------------------------------------- +recompose_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(#{fragment => <<?FRAGMENT/utf8>>, path => <<>>}), + ?FRAGMENT_ENC = uri_string:recompose(#{fragment => ?FRAGMENT, path => ""}). + +recompose_parse_fragment(_Config) -> + <<?FRAGMENT_ENC>> = uri_string:recompose(uri_string:parse(<<?FRAGMENT_ENC>>)), + ?FRAGMENT_ENC = uri_string:recompose(uri_string:parse(?FRAGMENT_ENC)). + +recompose_query(_Config) -> + <<?QUERY_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, path => <<>>}), + <<?QUERY_ENC?FRAGMENT_ENC>> = + uri_string:recompose(#{query => <<?QUERY/utf8>>, + fragment => <<?FRAGMENT/utf8>>, + path => <<>>}), + "?name=%C3%B6rn" = + uri_string:recompose(#{query => "name=örn", path => ""}), + "?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{query => "name=örn", + fragment => "näsa", + path => ""}). + +recompose_parse_query(_Config) -> + <<"?name=%C3%B6rn">> = uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn">>)), + <<"?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"?name=%C3%B6rn#n%C3%A4sa">>)), + "?name=%C3%B6rn" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn")), + "?name=%C3%B6rn#n%C3%A4sa" = uri_string:recompose(uri_string:parse("?name=%C3%B6rn#n%C3%A4sa")). + +recompose_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(#{path => <<"/där"/utf8>>}), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + fragment => <<"näsa"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>}), + <<"/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa">> = + uri_string:recompose(#{path => <<"/där"/utf8>>, + query => <<"name=örn"/utf8>>, + fragment => <<"näsa"/utf8>>}), + + + "/d%C3%A4r" = + uri_string:recompose(#{path => "/där"}), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + fragment => "näsa"}), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(#{path => "/där", + query => "name=örn"}), + "/d%C3%A4r?name=%C3%B6rn#n%C3%A4sa" = + uri_string:recompose(#{path => "/där", + query => "name=örn", + fragment => "näsa"}). + + +recompose_parse_path(_Config) -> + <<"/d%C3%A4r">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r">>)), + <<"/d%C3%A4r#n%C3%A4sa">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r#n%C3%A4sa">>)), + <<"/d%C3%A4r?name=%C3%B6rn">> = + uri_string:recompose(uri_string:parse(<<"/d%C3%A4r?name=%C3%B6rn">>)), + + "/d%C3%A4r" = + uri_string:recompose(uri_string:parse("/d%C3%A4r")), + "/d%C3%A4r#n%C3%A4sa" = + uri_string:recompose(uri_string:parse("/d%C3%A4r#n%C3%A4sa")), + "/d%C3%A4r?name=%C3%B6rn" = + uri_string:recompose(uri_string:parse("/d%C3%A4r?name=%C3%B6rn")). + + +recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_recompose/1, Tests). + +parse_recompose_autogen(_Config) -> + Tests = generate_test_vectors(uri_combinations()), + lists:map(fun run_test_parse_recompose/1, Tests). + +transcode_basic(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]). + +transcode_options(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []), + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]). + +transcode_mixed(_Config) -> + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). + +transcode_negative(_Config) -> + {error,invalid_percent_encoding,"%BXbar"} = + uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + {error,invalid_input,<<"ö">>} = + uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). + +normalize(_Config) -> + "/a/g" = uri_string:normalize("/a/b/c/./../../g"), + <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>), + "http://localhost-%C3%B6rebro/a/g" = + uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"), + <<"http://localhost-%C3%B6rebro/a/g">> = + uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>), + <<"https://localhost/">> = + uri_string:normalize(<<"https://localhost:443">>), + <<"https://localhost:445/">> = + uri_string:normalize(<<"https://localhost:445">>), + <<"ftp://localhost">> = + uri_string:normalize(<<"ftp://localhost:21">>), + <<"ssh://localhost">> = + uri_string:normalize(<<"ssh://localhost:22">>), + <<"sftp://localhost">> = + uri_string:normalize(<<"sftp://localhost:22">>), + <<"tftp://localhost">> = + uri_string:normalize(<<"tftp://localhost:69">>). diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl new file mode 100644 index 0000000000..ae2c61c7aa --- /dev/null +++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl @@ -0,0 +1,39 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_property_test_SUITE). + +-include_lib("common_test/include/ct.hrl"). +-compile(export_all). + +all() -> [recompose]. + +init_per_suite(Config) -> + ct_property_test:init_per_suite(Config). + +end_per_suite(Config) -> + Config. + +%%%======================================================================== +%%% Test suites +%%%======================================================================== +recompose(Config) -> + ct_property_test:quickcheck( + uri_string_recompose:prop_recompose(), + Config). diff --git a/lib/stdlib/uc_spec/CaseFolding.txt b/lib/stdlib/uc_spec/CaseFolding.txt index 372ee68bd8..efdf18e441 100644 --- a/lib/stdlib/uc_spec/CaseFolding.txt +++ b/lib/stdlib/uc_spec/CaseFolding.txt @@ -1,6 +1,6 @@ -# CaseFolding-9.0.0.txt -# Date: 2016-03-02, 18:54:54 GMT -# © 2016 Unicode®, Inc. +# CaseFolding-10.0.0.txt +# Date: 2017-04-14, 05:40:18 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -24,7 +24,7 @@ # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, including how to have case folding +# For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard. # diff --git a/lib/stdlib/uc_spec/CompositionExclusions.txt b/lib/stdlib/uc_spec/CompositionExclusions.txt index 1999ed1328..ff42508686 100644 --- a/lib/stdlib/uc_spec/CompositionExclusions.txt +++ b/lib/stdlib/uc_spec/CompositionExclusions.txt @@ -1,6 +1,6 @@ -# CompositionExclusions-9.0.0.txt -# Date: 2016-01-21, 22:00:00 GMT [KW, LI] -# © 2016 Unicode®, Inc. +# CompositionExclusions-10.0.0.txt +# Date: 2017-02-15, 00:00:00 GMT [KW, LI] +# © 2017 Unicode®, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database diff --git a/lib/stdlib/uc_spec/GraphemeBreakProperty.txt b/lib/stdlib/uc_spec/GraphemeBreakProperty.txt index c5e94a3762..32bb12e47e 100644 --- a/lib/stdlib/uc_spec/GraphemeBreakProperty.txt +++ b/lib/stdlib/uc_spec/GraphemeBreakProperty.txt @@ -1,6 +1,6 @@ -# GraphemeBreakProperty-9.0.0.txt -# Date: 2016-06-03, 22:23:55 GMT -# © 2016 Unicode®, Inc. +# GraphemeBreakProperty-10.0.0.txt +# Date: 2017-03-12, 07:03:41 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -25,8 +25,11 @@ 0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH 110BD ; Prepend # Cf KAITHI NUMBER SIGN 111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A86..11A89 ; Prepend # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA +11D46 ; Prepend # Lo MASARAM GONDI REPHA -# Total code points: 13 +# Total code points: 19 # ================================================ @@ -126,6 +129,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI 0ACD ; Extend # Mn GUJARATI SIGN VIRAMA 0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU 0B3C ; Extend # Mn ORIYA SIGN NUKTA 0B3E ; Extend # Mc ORIYA VOWEL SIGN AA @@ -154,7 +158,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA 0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA @@ -243,7 +248,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> 1CED ; Extend # Mn VEDIC SIGN TIRYAK 1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE +1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW 1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C ; Extend # Cf ZERO WIDTH NON-JOINER 20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -353,6 +358,15 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11A01..11A06 ; Extend # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A09..11A0A ; Extend # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA @@ -360,6 +374,11 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA 11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E 11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -387,7 +406,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1828 +# Total code points: 1901 # ================================================ @@ -472,6 +491,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -529,6 +549,10 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA 11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; SpacingMark # Mc AHOM VOWEL SIGN E +11A07..11A08 ; SpacingMark # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA +11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA 11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA 11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA 11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA @@ -538,7 +562,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 341 +# Total code points: 348 # ================================================ @@ -1375,8 +1399,9 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH 26F9 ; E_Base # So PERSON WITH BALL 270A..270D ; E_Base # So [4] RAISED FIST..WRITING HAND 1F385 ; E_Base # So FATHER CHRISTMAS -1F3C3..1F3C4 ; E_Base # So [2] RUNNER..SURFER -1F3CA..1F3CB ; E_Base # So [2] SWIMMER..WEIGHT LIFTER +1F3C2..1F3C4 ; E_Base # So [3] SNOWBOARDER..SURFER +1F3C7 ; E_Base # So HORSE RACING +1F3CA..1F3CC ; E_Base # So [3] SWIMMER..GOLFER 1F442..1F443 ; E_Base # So [2] EAR..NOSE 1F446..1F450 ; E_Base # So [11] WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN 1F46E ; E_Base # So POLICE OFFICER @@ -1385,7 +1410,7 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH 1F481..1F483 ; E_Base # So [3] INFORMATION DESK PERSON..DANCER 1F485..1F487 ; E_Base # So [3] NAIL POLISH..HAIRCUT 1F4AA ; E_Base # So FLEXED BICEPS -1F575 ; E_Base # So SLEUTH OR SPY +1F574..1F575 ; E_Base # So [2] MAN IN BUSINESS SUIT LEVITATING..SLEUTH OR SPY 1F57A ; E_Base # So MAN DANCING 1F590 ; E_Base # So RAISED HAND WITH FINGERS SPLAYED 1F595..1F596 ; E_Base # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS @@ -1394,13 +1419,15 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH 1F6A3 ; E_Base # So ROWBOAT 1F6B4..1F6B6 ; E_Base # So [3] BICYCLIST..PEDESTRIAN 1F6C0 ; E_Base # So BATH -1F918..1F91E ; E_Base # So [7] SIGN OF THE HORNS..HAND WITH INDEX AND MIDDLE FINGERS CROSSED +1F6CC ; E_Base # So SLEEPING ACCOMMODATION +1F918..1F91C ; E_Base # So [5] SIGN OF THE HORNS..RIGHT-FACING FIST +1F91E..1F91F ; E_Base # So [2] HAND WITH INDEX AND MIDDLE FINGERS CROSSED..I LOVE YOU HAND SIGN 1F926 ; E_Base # So FACE PALM -1F930 ; E_Base # So PREGNANT WOMAN -1F933..1F939 ; E_Base # So [7] SELFIE..JUGGLING -1F93C..1F93E ; E_Base # So [3] WRESTLERS..HANDBALL +1F930..1F939 ; E_Base # So [10] PREGNANT WOMAN..JUGGLING +1F93D..1F93E ; E_Base # So [2] WATER POLO..HANDBALL +1F9D1..1F9DD ; E_Base # So [13] ADULT..ELF -# Total code points: 79 +# Total code points: 98 # ================================================ @@ -1416,11 +1443,28 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH # ================================================ +2640 ; Glue_After_Zwj # So FEMALE SIGN +2642 ; Glue_After_Zwj # So MALE SIGN +2695..2696 ; Glue_After_Zwj # So [2] STAFF OF AESCULAPIUS..SCALES +2708 ; Glue_After_Zwj # So AIRPLANE 2764 ; Glue_After_Zwj # So HEAVY BLACK HEART +1F308 ; Glue_After_Zwj # So RAINBOW +1F33E ; Glue_After_Zwj # So EAR OF RICE +1F373 ; Glue_After_Zwj # So COOKING +1F393 ; Glue_After_Zwj # So GRADUATION CAP +1F3A4 ; Glue_After_Zwj # So MICROPHONE +1F3A8 ; Glue_After_Zwj # So ARTIST PALETTE +1F3EB ; Glue_After_Zwj # So SCHOOL +1F3ED ; Glue_After_Zwj # So FACTORY 1F48B ; Glue_After_Zwj # So KISS MARK +1F4BB..1F4BC ; Glue_After_Zwj # So [2] PERSONAL COMPUTER..BRIEFCASE +1F527 ; Glue_After_Zwj # So WRENCH +1F52C ; Glue_After_Zwj # So MICROSCOPE 1F5E8 ; Glue_After_Zwj # So LEFT SPEECH BUBBLE +1F680 ; Glue_After_Zwj # So ROCKET +1F692 ; Glue_After_Zwj # So FIRE ENGINE -# Total code points: 3 +# Total code points: 22 # ================================================ diff --git a/lib/stdlib/uc_spec/PropList.txt b/lib/stdlib/uc_spec/PropList.txt index a8c0da7135..9a2d0e4b1c 100644 --- a/lib/stdlib/uc_spec/PropList.txt +++ b/lib/stdlib/uc_spec/PropList.txt @@ -1,6 +1,6 @@ -# PropList-9.0.0.txt -# Date: 2016-06-01, 10:34:30 GMT -# © 2016 Unicode®, Inc. +# PropList-10.0.0.txt +# Date: 2017-03-10, 08:25:30 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -199,6 +199,9 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA 1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR 11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD 12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON @@ -209,7 +212,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 246 +# Total code points: 252 # ================================================ @@ -471,6 +474,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O 0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH 0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA @@ -508,7 +512,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU +0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR @@ -726,6 +730,17 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E 11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA 11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -737,6 +752,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E 11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O 11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA 16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK @@ -750,7 +771,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1238 +# Total code points: 1300 # ================================================ @@ -759,18 +780,20 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC 18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 88284 +# Total code points: 96174 # ================================================ @@ -826,12 +849,14 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA 0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA 0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B3C ; Diacritic # Mn ORIYA SIGN NUKTA 0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA 0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA 0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA 0CBC ; Diacritic # Mn KANNADA SIGN NUKTA 0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA 0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA 0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT @@ -871,10 +896,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Diacritic # Mn VEDIC SIGN TIRYAK 1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA 1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW -1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE +1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW 1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1FBD ; Diacritic # Sk GREEK KORONIS 1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI @@ -947,7 +973,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA 116B7 ; Diacritic # Mn TAKRI SIGN NUKTA 1172B ; Diacritic # Mn AHOM SIGN KILLER +11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Diacritic # Mn SOYOMBO SUBJOINER 11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA +11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -960,7 +991,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 782 +# Total code points: 798 # ================================================ @@ -989,11 +1020,12 @@ AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETE FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 1135D ; Extender # Lo GRANTHA SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +11A98 ; Extender # Mn SOYOMBO GEMINATION MARK 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM -16FE0 ; Extender # Lm TANGUT ITERATION MARK +16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 42 +# Total code points: 44 # ================================================ @@ -1105,7 +1137,7 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG # ================================================ 3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 @@ -1117,8 +1149,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -# Total code points: 80388 +# Total code points: 87882 # ================================================ @@ -1277,6 +1310,8 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA 1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD 11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA 16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA 16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP @@ -1285,7 +1320,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP -# Total code points: 124 +# Total code points: 128 # ================================================ @@ -1402,9 +1437,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM 23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE 23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..23FE ; Pattern_Syntax # So [29] WHITE TRAPEZIUM..POWER SLEEP SYMBOL -23FF ; Pattern_Syntax # Cn <reserved-23FF> -2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO 2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F> 2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F> @@ -1492,8 +1525,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC> 2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED 2BC9 ; Pattern_Syntax # Cn <reserved-2BC9> -2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN -2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB> +2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK +2BD3..2BEB ; Pattern_Syntax # Cn [25] <reserved-2BD3>..<reserved-2BEB> 2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS 2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF> 2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER @@ -1533,8 +1566,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN 2E41 ; Pattern_Syntax # Po REVERSED COMMA 2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK -2E43..2E44 ; Pattern_Syntax # Po [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK -2E45..2E7F ; Pattern_Syntax # Cn [59] <reserved-2E45>..<reserved-2E7F> +2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA +2E4A..2E7F ; Pattern_Syntax # Cn [54] <reserved-2E4A>..<reserved-2E7F> 3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET 3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET @@ -1576,4 +1609,10 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT # Total code points: 10 +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + # EOF diff --git a/lib/stdlib/uc_spec/SpecialCasing.txt b/lib/stdlib/uc_spec/SpecialCasing.txt index b23fa7f768..b9ba0d81c1 100644 --- a/lib/stdlib/uc_spec/SpecialCasing.txt +++ b/lib/stdlib/uc_spec/SpecialCasing.txt @@ -1,6 +1,6 @@ -# SpecialCasing-9.0.0.txt -# Date: 2016-03-02, 18:55:13 GMT -# © 2016 Unicode®, Inc. +# SpecialCasing-10.0.0.txt +# Date: 2017-04-14, 05:40:43 GMT +# © 2017 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -197,7 +197,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH # ================================================================================ # Conditional Mappings -# The remainder of this file provides conditional casing data used to produce +# The remainder of this file provides conditional casing data used to produce # full case mappings. # ================================================================================ # Language-Insensitive Mappings diff --git a/lib/stdlib/uc_spec/UnicodeData.txt b/lib/stdlib/uc_spec/UnicodeData.txt index a756976461..d89c64f526 100644 --- a/lib/stdlib/uc_spec/UnicodeData.txt +++ b/lib/stdlib/uc_spec/UnicodeData.txt @@ -2072,6 +2072,17 @@ 085A;MANDAIC VOCALIZATION MARK;Mn;220;NSM;;;;;N;;;;; 085B;MANDAIC GEMINATION MARK;Mn;220;NSM;;;;;N;;;;; 085E;MANDAIC PUNCTUATION;Po;0;R;;;;;N;;;;; +0860;SYRIAC LETTER MALAYALAM NGA;Lo;0;AL;;;;;N;;;;; +0861;SYRIAC LETTER MALAYALAM JA;Lo;0;AL;;;;;N;;;;; +0862;SYRIAC LETTER MALAYALAM NYA;Lo;0;AL;;;;;N;;;;; +0863;SYRIAC LETTER MALAYALAM TTA;Lo;0;AL;;;;;N;;;;; +0864;SYRIAC LETTER MALAYALAM NNA;Lo;0;AL;;;;;N;;;;; +0865;SYRIAC LETTER MALAYALAM NNNA;Lo;0;AL;;;;;N;;;;; +0866;SYRIAC LETTER MALAYALAM BHA;Lo;0;AL;;;;;N;;;;; +0867;SYRIAC LETTER MALAYALAM RA;Lo;0;AL;;;;;N;;;;; +0868;SYRIAC LETTER MALAYALAM LLA;Lo;0;AL;;;;;N;;;;; +0869;SYRIAC LETTER MALAYALAM LLLA;Lo;0;AL;;;;;N;;;;; +086A;SYRIAC LETTER MALAYALAM SSA;Lo;0;AL;;;;;N;;;;; 08A0;ARABIC LETTER BEH WITH SMALL V BELOW;Lo;0;AL;;;;;N;;;;; 08A1;ARABIC LETTER BEH WITH HAMZA ABOVE;Lo;0;AL;;;;;N;;;;; 08A2;ARABIC LETTER JEEM WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;; @@ -2366,6 +2377,8 @@ 09F9;BENGALI CURRENCY DENOMINATOR SIXTEEN;No;0;L;;;;16;N;;;;; 09FA;BENGALI ISSHAR;So;0;L;;;;;N;;;;; 09FB;BENGALI GANDA MARK;Sc;0;ET;;;;;N;;;;; +09FC;BENGALI LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;; +09FD;BENGALI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 0A01;GURMUKHI SIGN ADAK BINDI;Mn;0;NSM;;;;;N;;;;; 0A02;GURMUKHI SIGN BINDI;Mn;0;NSM;;;;;N;;;;; 0A03;GURMUKHI SIGN VISARGA;Mc;0;L;;;;;N;;;;; @@ -2530,6 +2543,12 @@ 0AF0;GUJARATI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 0AF1;GUJARATI RUPEE SIGN;Sc;0;ET;;;;;N;;;;; 0AF9;GUJARATI LETTER ZHA;Lo;0;L;;;;;N;;;;; +0AFA;GUJARATI SIGN SUKUN;Mn;0;NSM;;;;;N;;;;; +0AFB;GUJARATI SIGN SHADDA;Mn;0;NSM;;;;;N;;;;; +0AFC;GUJARATI SIGN MADDAH;Mn;0;NSM;;;;;N;;;;; +0AFD;GUJARATI SIGN THREE-DOT NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;; +0AFE;GUJARATI SIGN CIRCLE NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;; +0AFF;GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE;Mn;0;NSM;;;;;N;;;;; 0B01;ORIYA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0B02;ORIYA SIGN ANUSVARA;Mc;0;L;;;;;N;;;;; 0B03;ORIYA SIGN VISARGA;Mc;0;L;;;;;N;;;;; @@ -2876,6 +2895,7 @@ 0CEF;KANNADA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 0CF1;KANNADA SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; 0CF2;KANNADA SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; +0D00;MALAYALAM SIGN COMBINING ANUSVARA ABOVE;Mn;0;NSM;;;;;N;;;;; 0D01;MALAYALAM SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0D02;MALAYALAM SIGN ANUSVARA;Mc;0;L;;;;;N;;;;; 0D03;MALAYALAM SIGN VISARGA;Mc;0;L;;;;;N;;;;; @@ -2931,6 +2951,8 @@ 0D38;MALAYALAM LETTER SA;Lo;0;L;;;;;N;;;;; 0D39;MALAYALAM LETTER HA;Lo;0;L;;;;;N;;;;; 0D3A;MALAYALAM LETTER TTTA;Lo;0;L;;;;;N;;;;; +0D3B;MALAYALAM SIGN VERTICAL BAR VIRAMA;Mn;9;NSM;;;;;N;;;;; +0D3C;MALAYALAM SIGN CIRCULAR VIRAMA;Mn;9;NSM;;;;;N;;;;; 0D3D;MALAYALAM SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;; 0D3E;MALAYALAM VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; 0D3F;MALAYALAM VOWEL SIGN I;Mc;0;L;;;;;N;;;;; @@ -6413,6 +6435,7 @@ 1CF4;VEDIC TONE CANDRA ABOVE;Mn;230;NSM;;;;;N;;;;; 1CF5;VEDIC SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; 1CF6;VEDIC SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; +1CF7;VEDIC SIGN ATIKRAMA;Mc;0;L;;;;;N;;;;; 1CF8;VEDIC TONE RING ABOVE;Mn;230;NSM;;;;;N;;;;; 1CF9;VEDIC TONE DOUBLE RING ABOVE;Mn;230;NSM;;;;;N;;;;; 1D00;LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;; @@ -6661,6 +6684,10 @@ 1DF3;COMBINING LATIN SMALL LETTER O WITH DIAERESIS;Mn;230;NSM;;;;;N;;;;; 1DF4;COMBINING LATIN SMALL LETTER U WITH DIAERESIS;Mn;230;NSM;;;;;N;;;;; 1DF5;COMBINING UP TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1DF6;COMBINING KAVYKA ABOVE RIGHT;Mn;232;NSM;;;;;N;;;;; +1DF7;COMBINING KAVYKA ABOVE LEFT;Mn;228;NSM;;;;;N;;;;; +1DF8;COMBINING DOT ABOVE LEFT;Mn;228;NSM;;;;;N;;;;; +1DF9;COMBINING WIDE INVERTED BRIDGE BELOW;Mn;220;NSM;;;;;N;;;;; 1DFB;COMBINING DELETION MARK;Mn;230;NSM;;;;;N;;;;; 1DFC;COMBINING DOUBLE INVERTED BREVE BELOW;Mn;233;NSM;;;;;N;;;;; 1DFD;COMBINING ALMOST EQUAL TO BELOW;Mn;220;NSM;;;;;N;;;;; @@ -7339,6 +7366,7 @@ 20BC;MANAT SIGN;Sc;0;ET;;;;;N;;;;; 20BD;RUBLE SIGN;Sc;0;ET;;;;;N;;;;; 20BE;LARI SIGN;Sc;0;ET;;;;;N;;;;; +20BF;BITCOIN SIGN;Sc;0;ET;;;;;N;;;;; 20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;; 20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;; 20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;; @@ -8135,6 +8163,7 @@ 23FC;POWER ON-OFF SYMBOL;So;0;ON;;;;;N;;;;; 23FD;POWER ON SYMBOL;So;0;ON;;;;;N;;;;; 23FE;POWER SLEEP SYMBOL;So;0;ON;;;;;N;;;;; +23FF;OBSERVER EYE SYMBOL;So;0;ON;;;;;N;;;;; 2400;SYMBOL FOR NULL;So;0;ON;;;;;N;GRAPHIC FOR NULL;;;; 2401;SYMBOL FOR START OF HEADING;So;0;ON;;;;;N;GRAPHIC FOR START OF HEADING;;;; 2402;SYMBOL FOR START OF TEXT;So;0;ON;;;;;N;GRAPHIC FOR START OF TEXT;;;; @@ -10083,6 +10112,7 @@ 2BCF;ROTATED WHITE FOUR POINTED CUSP;So;0;ON;;;;;N;;;;; 2BD0;SQUARE POSITION INDICATOR;So;0;ON;;;;;N;;;;; 2BD1;UNCERTAINTY SIGN;So;0;ON;;;;;N;;;;; +2BD2;GROUP MARK;So;0;ON;;;;;N;;;;; 2BEC;LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;; 2BED;UPWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;; 2BEE;RIGHTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS;So;0;ON;;;;;N;;;;; @@ -10615,6 +10645,11 @@ 2E42;DOUBLE LOW-REVERSED-9 QUOTATION MARK;Ps;0;ON;;;;;N;;;;; 2E43;DASH WITH LEFT UPTURN;Po;0;ON;;;;;N;;;;; 2E44;DOUBLE SUSPENSION MARK;Po;0;ON;;;;;N;;;;; +2E45;INVERTED LOW KAVYKA;Po;0;ON;;;;;N;;;;; +2E46;INVERTED LOW KAVYKA WITH KAVYKA ABOVE;Po;0;ON;;;;;N;;;;; +2E47;LOW KAVYKA;Po;0;ON;;;;;N;;;;; +2E48;LOW KAVYKA WITH DOT;Po;0;ON;;;;;N;;;;; +2E49;DOUBLE STACKED COMMA;Po;0;ON;;;;;N;;;;; 2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;; 2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;; 2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;; @@ -11250,6 +11285,7 @@ 312B;BOPOMOFO LETTER NG;Lo;0;L;;;;;N;;;;; 312C;BOPOMOFO LETTER GN;Lo;0;L;;;;;N;;;;; 312D;BOPOMOFO LETTER IH;Lo;0;L;;;;;N;;;;; +312E;BOPOMOFO LETTER O WITH DOT ABOVE;Lo;0;L;;;;;N;;;;; 3131;HANGUL LETTER KIYEOK;Lo;0;L;<compat> 1100;;;;N;HANGUL LETTER GIYEOG;;;; 3132;HANGUL LETTER SSANGKIYEOK;Lo;0;L;<compat> 1101;;;;N;HANGUL LETTER SSANG GIYEOG;;;; 3133;HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;; @@ -12016,7 +12052,7 @@ 4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;; 4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; -9FD5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; +9FEA;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;; @@ -17093,6 +17129,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10321;OLD ITALIC NUMERAL FIVE;No;0;L;;;;5;N;;;;; 10322;OLD ITALIC NUMERAL TEN;No;0;L;;;;10;N;;;;; 10323;OLD ITALIC NUMERAL FIFTY;No;0;L;;;;50;N;;;;; +1032D;OLD ITALIC LETTER YE;Lo;0;L;;;;;N;;;;; +1032E;OLD ITALIC LETTER NORTHERN TSE;Lo;0;L;;;;;N;;;;; +1032F;OLD ITALIC LETTER SOUTHERN TSE;Lo;0;L;;;;;N;;;;; 10330;GOTHIC LETTER AHSA;Lo;0;L;;;;;N;;;;; 10331;GOTHIC LETTER BAIRKAN;Lo;0;L;;;;;N;;;;; 10332;GOTHIC LETTER GIBA;Lo;0;L;;;;;N;;;;; @@ -20068,6 +20107,158 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 118F1;WARANG CITI NUMBER EIGHTY;No;0;L;;;;80;N;;;;; 118F2;WARANG CITI NUMBER NINETY;No;0;L;;;;90;N;;;;; 118FF;WARANG CITI OM;Lo;0;L;;;;;N;;;;; +11A00;ZANABAZAR SQUARE LETTER A;Lo;0;L;;;;;N;;;;; +11A01;ZANABAZAR SQUARE VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +11A02;ZANABAZAR SQUARE VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;; +11A03;ZANABAZAR SQUARE VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +11A04;ZANABAZAR SQUARE VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +11A05;ZANABAZAR SQUARE VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;; +11A06;ZANABAZAR SQUARE VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +11A07;ZANABAZAR SQUARE VOWEL SIGN AI;Mc;0;L;;;;;N;;;;; +11A08;ZANABAZAR SQUARE VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +11A09;ZANABAZAR SQUARE VOWEL SIGN REVERSED I;Mn;0;NSM;;;;;N;;;;; +11A0A;ZANABAZAR SQUARE VOWEL LENGTH MARK;Mn;0;NSM;;;;;N;;;;; +11A0B;ZANABAZAR SQUARE LETTER KA;Lo;0;L;;;;;N;;;;; +11A0C;ZANABAZAR SQUARE LETTER KHA;Lo;0;L;;;;;N;;;;; +11A0D;ZANABAZAR SQUARE LETTER GA;Lo;0;L;;;;;N;;;;; +11A0E;ZANABAZAR SQUARE LETTER GHA;Lo;0;L;;;;;N;;;;; +11A0F;ZANABAZAR SQUARE LETTER NGA;Lo;0;L;;;;;N;;;;; +11A10;ZANABAZAR SQUARE LETTER CA;Lo;0;L;;;;;N;;;;; +11A11;ZANABAZAR SQUARE LETTER CHA;Lo;0;L;;;;;N;;;;; +11A12;ZANABAZAR SQUARE LETTER JA;Lo;0;L;;;;;N;;;;; +11A13;ZANABAZAR SQUARE LETTER NYA;Lo;0;L;;;;;N;;;;; +11A14;ZANABAZAR SQUARE LETTER TTA;Lo;0;L;;;;;N;;;;; +11A15;ZANABAZAR SQUARE LETTER TTHA;Lo;0;L;;;;;N;;;;; +11A16;ZANABAZAR SQUARE LETTER DDA;Lo;0;L;;;;;N;;;;; +11A17;ZANABAZAR SQUARE LETTER DDHA;Lo;0;L;;;;;N;;;;; +11A18;ZANABAZAR SQUARE LETTER NNA;Lo;0;L;;;;;N;;;;; +11A19;ZANABAZAR SQUARE LETTER TA;Lo;0;L;;;;;N;;;;; +11A1A;ZANABAZAR SQUARE LETTER THA;Lo;0;L;;;;;N;;;;; +11A1B;ZANABAZAR SQUARE LETTER DA;Lo;0;L;;;;;N;;;;; +11A1C;ZANABAZAR SQUARE LETTER DHA;Lo;0;L;;;;;N;;;;; +11A1D;ZANABAZAR SQUARE LETTER NA;Lo;0;L;;;;;N;;;;; +11A1E;ZANABAZAR SQUARE LETTER PA;Lo;0;L;;;;;N;;;;; +11A1F;ZANABAZAR SQUARE LETTER PHA;Lo;0;L;;;;;N;;;;; +11A20;ZANABAZAR SQUARE LETTER BA;Lo;0;L;;;;;N;;;;; +11A21;ZANABAZAR SQUARE LETTER BHA;Lo;0;L;;;;;N;;;;; +11A22;ZANABAZAR SQUARE LETTER MA;Lo;0;L;;;;;N;;;;; +11A23;ZANABAZAR SQUARE LETTER TSA;Lo;0;L;;;;;N;;;;; +11A24;ZANABAZAR SQUARE LETTER TSHA;Lo;0;L;;;;;N;;;;; +11A25;ZANABAZAR SQUARE LETTER DZA;Lo;0;L;;;;;N;;;;; +11A26;ZANABAZAR SQUARE LETTER DZHA;Lo;0;L;;;;;N;;;;; +11A27;ZANABAZAR SQUARE LETTER ZHA;Lo;0;L;;;;;N;;;;; +11A28;ZANABAZAR SQUARE LETTER ZA;Lo;0;L;;;;;N;;;;; +11A29;ZANABAZAR SQUARE LETTER -A;Lo;0;L;;;;;N;;;;; +11A2A;ZANABAZAR SQUARE LETTER YA;Lo;0;L;;;;;N;;;;; +11A2B;ZANABAZAR SQUARE LETTER RA;Lo;0;L;;;;;N;;;;; +11A2C;ZANABAZAR SQUARE LETTER LA;Lo;0;L;;;;;N;;;;; +11A2D;ZANABAZAR SQUARE LETTER VA;Lo;0;L;;;;;N;;;;; +11A2E;ZANABAZAR SQUARE LETTER SHA;Lo;0;L;;;;;N;;;;; +11A2F;ZANABAZAR SQUARE LETTER SSA;Lo;0;L;;;;;N;;;;; +11A30;ZANABAZAR SQUARE LETTER SA;Lo;0;L;;;;;N;;;;; +11A31;ZANABAZAR SQUARE LETTER HA;Lo;0;L;;;;;N;;;;; +11A32;ZANABAZAR SQUARE LETTER KSSA;Lo;0;L;;;;;N;;;;; +11A33;ZANABAZAR SQUARE FINAL CONSONANT MARK;Mn;0;NSM;;;;;N;;;;; +11A34;ZANABAZAR SQUARE SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +11A35;ZANABAZAR SQUARE SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; +11A36;ZANABAZAR SQUARE SIGN CANDRABINDU WITH ORNAMENT;Mn;0;NSM;;;;;N;;;;; +11A37;ZANABAZAR SQUARE SIGN CANDRA WITH ORNAMENT;Mn;0;NSM;;;;;N;;;;; +11A38;ZANABAZAR SQUARE SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +11A39;ZANABAZAR SQUARE SIGN VISARGA;Mc;0;L;;;;;N;;;;; +11A3A;ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA;Lo;0;L;;;;;N;;;;; +11A3B;ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA;Mn;0;NSM;;;;;N;;;;; +11A3C;ZANABAZAR SQUARE CLUSTER-FINAL LETTER RA;Mn;0;NSM;;;;;N;;;;; +11A3D;ZANABAZAR SQUARE CLUSTER-FINAL LETTER LA;Mn;0;NSM;;;;;N;;;;; +11A3E;ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA;Mn;0;NSM;;;;;N;;;;; +11A3F;ZANABAZAR SQUARE INITIAL HEAD MARK;Po;0;L;;;;;N;;;;; +11A40;ZANABAZAR SQUARE CLOSING HEAD MARK;Po;0;L;;;;;N;;;;; +11A41;ZANABAZAR SQUARE MARK TSHEG;Po;0;L;;;;;N;;;;; +11A42;ZANABAZAR SQUARE MARK SHAD;Po;0;L;;;;;N;;;;; +11A43;ZANABAZAR SQUARE MARK DOUBLE SHAD;Po;0;L;;;;;N;;;;; +11A44;ZANABAZAR SQUARE MARK LONG TSHEG;Po;0;L;;;;;N;;;;; +11A45;ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK;Po;0;L;;;;;N;;;;; +11A46;ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK;Po;0;L;;;;;N;;;;; +11A47;ZANABAZAR SQUARE SUBJOINER;Mn;9;NSM;;;;;N;;;;; +11A50;SOYOMBO LETTER A;Lo;0;L;;;;;N;;;;; +11A51;SOYOMBO VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +11A52;SOYOMBO VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;; +11A53;SOYOMBO VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +11A54;SOYOMBO VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +11A55;SOYOMBO VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +11A56;SOYOMBO VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;; +11A57;SOYOMBO VOWEL SIGN AI;Mc;0;L;;;;;N;;;;; +11A58;SOYOMBO VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +11A59;SOYOMBO VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;; +11A5A;SOYOMBO VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;; +11A5B;SOYOMBO VOWEL LENGTH MARK;Mn;0;NSM;;;;;N;;;;; +11A5C;SOYOMBO LETTER KA;Lo;0;L;;;;;N;;;;; +11A5D;SOYOMBO LETTER KHA;Lo;0;L;;;;;N;;;;; +11A5E;SOYOMBO LETTER GA;Lo;0;L;;;;;N;;;;; +11A5F;SOYOMBO LETTER GHA;Lo;0;L;;;;;N;;;;; +11A60;SOYOMBO LETTER NGA;Lo;0;L;;;;;N;;;;; +11A61;SOYOMBO LETTER CA;Lo;0;L;;;;;N;;;;; +11A62;SOYOMBO LETTER CHA;Lo;0;L;;;;;N;;;;; +11A63;SOYOMBO LETTER JA;Lo;0;L;;;;;N;;;;; +11A64;SOYOMBO LETTER JHA;Lo;0;L;;;;;N;;;;; +11A65;SOYOMBO LETTER NYA;Lo;0;L;;;;;N;;;;; +11A66;SOYOMBO LETTER TTA;Lo;0;L;;;;;N;;;;; +11A67;SOYOMBO LETTER TTHA;Lo;0;L;;;;;N;;;;; +11A68;SOYOMBO LETTER DDA;Lo;0;L;;;;;N;;;;; +11A69;SOYOMBO LETTER DDHA;Lo;0;L;;;;;N;;;;; +11A6A;SOYOMBO LETTER NNA;Lo;0;L;;;;;N;;;;; +11A6B;SOYOMBO LETTER TA;Lo;0;L;;;;;N;;;;; +11A6C;SOYOMBO LETTER THA;Lo;0;L;;;;;N;;;;; +11A6D;SOYOMBO LETTER DA;Lo;0;L;;;;;N;;;;; +11A6E;SOYOMBO LETTER DHA;Lo;0;L;;;;;N;;;;; +11A6F;SOYOMBO LETTER NA;Lo;0;L;;;;;N;;;;; +11A70;SOYOMBO LETTER PA;Lo;0;L;;;;;N;;;;; +11A71;SOYOMBO LETTER PHA;Lo;0;L;;;;;N;;;;; +11A72;SOYOMBO LETTER BA;Lo;0;L;;;;;N;;;;; +11A73;SOYOMBO LETTER BHA;Lo;0;L;;;;;N;;;;; +11A74;SOYOMBO LETTER MA;Lo;0;L;;;;;N;;;;; +11A75;SOYOMBO LETTER TSA;Lo;0;L;;;;;N;;;;; +11A76;SOYOMBO LETTER TSHA;Lo;0;L;;;;;N;;;;; +11A77;SOYOMBO LETTER DZA;Lo;0;L;;;;;N;;;;; +11A78;SOYOMBO LETTER ZHA;Lo;0;L;;;;;N;;;;; +11A79;SOYOMBO LETTER ZA;Lo;0;L;;;;;N;;;;; +11A7A;SOYOMBO LETTER -A;Lo;0;L;;;;;N;;;;; +11A7B;SOYOMBO LETTER YA;Lo;0;L;;;;;N;;;;; +11A7C;SOYOMBO LETTER RA;Lo;0;L;;;;;N;;;;; +11A7D;SOYOMBO LETTER LA;Lo;0;L;;;;;N;;;;; +11A7E;SOYOMBO LETTER VA;Lo;0;L;;;;;N;;;;; +11A7F;SOYOMBO LETTER SHA;Lo;0;L;;;;;N;;;;; +11A80;SOYOMBO LETTER SSA;Lo;0;L;;;;;N;;;;; +11A81;SOYOMBO LETTER SA;Lo;0;L;;;;;N;;;;; +11A82;SOYOMBO LETTER HA;Lo;0;L;;;;;N;;;;; +11A83;SOYOMBO LETTER KSSA;Lo;0;L;;;;;N;;;;; +11A86;SOYOMBO CLUSTER-INITIAL LETTER RA;Lo;0;L;;;;;N;;;;; +11A87;SOYOMBO CLUSTER-INITIAL LETTER LA;Lo;0;L;;;;;N;;;;; +11A88;SOYOMBO CLUSTER-INITIAL LETTER SHA;Lo;0;L;;;;;N;;;;; +11A89;SOYOMBO CLUSTER-INITIAL LETTER SA;Lo;0;L;;;;;N;;;;; +11A8A;SOYOMBO FINAL CONSONANT SIGN G;Mn;0;NSM;;;;;N;;;;; +11A8B;SOYOMBO FINAL CONSONANT SIGN K;Mn;0;NSM;;;;;N;;;;; +11A8C;SOYOMBO FINAL CONSONANT SIGN NG;Mn;0;NSM;;;;;N;;;;; +11A8D;SOYOMBO FINAL CONSONANT SIGN D;Mn;0;NSM;;;;;N;;;;; +11A8E;SOYOMBO FINAL CONSONANT SIGN N;Mn;0;NSM;;;;;N;;;;; +11A8F;SOYOMBO FINAL CONSONANT SIGN B;Mn;0;NSM;;;;;N;;;;; +11A90;SOYOMBO FINAL CONSONANT SIGN M;Mn;0;NSM;;;;;N;;;;; +11A91;SOYOMBO FINAL CONSONANT SIGN R;Mn;0;NSM;;;;;N;;;;; +11A92;SOYOMBO FINAL CONSONANT SIGN L;Mn;0;NSM;;;;;N;;;;; +11A93;SOYOMBO FINAL CONSONANT SIGN SH;Mn;0;NSM;;;;;N;;;;; +11A94;SOYOMBO FINAL CONSONANT SIGN S;Mn;0;NSM;;;;;N;;;;; +11A95;SOYOMBO FINAL CONSONANT SIGN -A;Mn;0;NSM;;;;;N;;;;; +11A96;SOYOMBO SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +11A97;SOYOMBO SIGN VISARGA;Mc;0;L;;;;;N;;;;; +11A98;SOYOMBO GEMINATION MARK;Mn;0;NSM;;;;;N;;;;; +11A99;SOYOMBO SUBJOINER;Mn;9;NSM;;;;;N;;;;; +11A9A;SOYOMBO MARK TSHEG;Po;0;L;;;;;N;;;;; +11A9B;SOYOMBO MARK SHAD;Po;0;L;;;;;N;;;;; +11A9C;SOYOMBO MARK DOUBLE SHAD;Po;0;L;;;;;N;;;;; +11A9E;SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME;Po;0;L;;;;;N;;;;; +11A9F;SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME;Po;0;L;;;;;N;;;;; +11AA0;SOYOMBO HEAD MARK WITH MOON AND SUN;Po;0;L;;;;;N;;;;; +11AA1;SOYOMBO TERMINAL MARK-1;Po;0;L;;;;;N;;;;; +11AA2;SOYOMBO TERMINAL MARK-2;Po;0;L;;;;;N;;;;; 11AC0;PAU CIN HAU LETTER PA;Lo;0;L;;;;;N;;;;; 11AC1;PAU CIN HAU LETTER KA;Lo;0;L;;;;;N;;;;; 11AC2;PAU CIN HAU LETTER LA;Lo;0;L;;;;;N;;;;; @@ -20290,6 +20481,81 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11CB4;MARCHEN VOWEL SIGN O;Mc;0;L;;;;;N;;;;; 11CB5;MARCHEN SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; 11CB6;MARCHEN SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; +11D00;MASARAM GONDI LETTER A;Lo;0;L;;;;;N;;;;; +11D01;MASARAM GONDI LETTER AA;Lo;0;L;;;;;N;;;;; +11D02;MASARAM GONDI LETTER I;Lo;0;L;;;;;N;;;;; +11D03;MASARAM GONDI LETTER II;Lo;0;L;;;;;N;;;;; +11D04;MASARAM GONDI LETTER U;Lo;0;L;;;;;N;;;;; +11D05;MASARAM GONDI LETTER UU;Lo;0;L;;;;;N;;;;; +11D06;MASARAM GONDI LETTER E;Lo;0;L;;;;;N;;;;; +11D08;MASARAM GONDI LETTER AI;Lo;0;L;;;;;N;;;;; +11D09;MASARAM GONDI LETTER O;Lo;0;L;;;;;N;;;;; +11D0B;MASARAM GONDI LETTER AU;Lo;0;L;;;;;N;;;;; +11D0C;MASARAM GONDI LETTER KA;Lo;0;L;;;;;N;;;;; +11D0D;MASARAM GONDI LETTER KHA;Lo;0;L;;;;;N;;;;; +11D0E;MASARAM GONDI LETTER GA;Lo;0;L;;;;;N;;;;; +11D0F;MASARAM GONDI LETTER GHA;Lo;0;L;;;;;N;;;;; +11D10;MASARAM GONDI LETTER NGA;Lo;0;L;;;;;N;;;;; +11D11;MASARAM GONDI LETTER CA;Lo;0;L;;;;;N;;;;; +11D12;MASARAM GONDI LETTER CHA;Lo;0;L;;;;;N;;;;; +11D13;MASARAM GONDI LETTER JA;Lo;0;L;;;;;N;;;;; +11D14;MASARAM GONDI LETTER JHA;Lo;0;L;;;;;N;;;;; +11D15;MASARAM GONDI LETTER NYA;Lo;0;L;;;;;N;;;;; +11D16;MASARAM GONDI LETTER TTA;Lo;0;L;;;;;N;;;;; +11D17;MASARAM GONDI LETTER TTHA;Lo;0;L;;;;;N;;;;; +11D18;MASARAM GONDI LETTER DDA;Lo;0;L;;;;;N;;;;; +11D19;MASARAM GONDI LETTER DDHA;Lo;0;L;;;;;N;;;;; +11D1A;MASARAM GONDI LETTER NNA;Lo;0;L;;;;;N;;;;; +11D1B;MASARAM GONDI LETTER TA;Lo;0;L;;;;;N;;;;; +11D1C;MASARAM GONDI LETTER THA;Lo;0;L;;;;;N;;;;; +11D1D;MASARAM GONDI LETTER DA;Lo;0;L;;;;;N;;;;; +11D1E;MASARAM GONDI LETTER DHA;Lo;0;L;;;;;N;;;;; +11D1F;MASARAM GONDI LETTER NA;Lo;0;L;;;;;N;;;;; +11D20;MASARAM GONDI LETTER PA;Lo;0;L;;;;;N;;;;; +11D21;MASARAM GONDI LETTER PHA;Lo;0;L;;;;;N;;;;; +11D22;MASARAM GONDI LETTER BA;Lo;0;L;;;;;N;;;;; +11D23;MASARAM GONDI LETTER BHA;Lo;0;L;;;;;N;;;;; +11D24;MASARAM GONDI LETTER MA;Lo;0;L;;;;;N;;;;; +11D25;MASARAM GONDI LETTER YA;Lo;0;L;;;;;N;;;;; +11D26;MASARAM GONDI LETTER RA;Lo;0;L;;;;;N;;;;; +11D27;MASARAM GONDI LETTER LA;Lo;0;L;;;;;N;;;;; +11D28;MASARAM GONDI LETTER VA;Lo;0;L;;;;;N;;;;; +11D29;MASARAM GONDI LETTER SHA;Lo;0;L;;;;;N;;;;; +11D2A;MASARAM GONDI LETTER SSA;Lo;0;L;;;;;N;;;;; +11D2B;MASARAM GONDI LETTER SA;Lo;0;L;;;;;N;;;;; +11D2C;MASARAM GONDI LETTER HA;Lo;0;L;;;;;N;;;;; +11D2D;MASARAM GONDI LETTER LLA;Lo;0;L;;;;;N;;;;; +11D2E;MASARAM GONDI LETTER KSSA;Lo;0;L;;;;;N;;;;; +11D2F;MASARAM GONDI LETTER JNYA;Lo;0;L;;;;;N;;;;; +11D30;MASARAM GONDI LETTER TRA;Lo;0;L;;;;;N;;;;; +11D31;MASARAM GONDI VOWEL SIGN AA;Mn;0;NSM;;;;;N;;;;; +11D32;MASARAM GONDI VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +11D33;MASARAM GONDI VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; +11D34;MASARAM GONDI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +11D35;MASARAM GONDI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +11D36;MASARAM GONDI VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;; +11D3A;MASARAM GONDI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +11D3C;MASARAM GONDI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +11D3D;MASARAM GONDI VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +11D3F;MASARAM GONDI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;; +11D40;MASARAM GONDI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +11D41;MASARAM GONDI SIGN VISARGA;Mn;0;NSM;;;;;N;;;;; +11D42;MASARAM GONDI SIGN NUKTA;Mn;7;NSM;;;;;N;;;;; +11D43;MASARAM GONDI SIGN CANDRA;Mn;0;NSM;;;;;N;;;;; +11D44;MASARAM GONDI SIGN HALANTA;Mn;9;NSM;;;;;N;;;;; +11D45;MASARAM GONDI VIRAMA;Mn;9;NSM;;;;;N;;;;; +11D46;MASARAM GONDI REPHA;Lo;0;L;;;;;N;;;;; +11D47;MASARAM GONDI RA-KARA;Mn;0;NSM;;;;;N;;;;; +11D50;MASARAM GONDI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11D51;MASARAM GONDI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11D52;MASARAM GONDI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11D53;MASARAM GONDI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +11D54;MASARAM GONDI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +11D55;MASARAM GONDI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +11D56;MASARAM GONDI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +11D57;MASARAM GONDI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +11D58;MASARAM GONDI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +11D59;MASARAM GONDI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 12000;CUNEIFORM SIGN A;Lo;0;L;;;;;N;;;;; 12001;CUNEIFORM SIGN A TIMES A;Lo;0;L;;;;;N;;;;; 12002;CUNEIFORM SIGN A TIMES BAD;Lo;0;L;;;;;N;;;;; @@ -24087,6 +24353,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16F9E;MIAO LETTER REFORMED TONE-6;Lm;0;L;;;;;N;;;;; 16F9F;MIAO LETTER REFORMED TONE-8;Lm;0;L;;;;;N;;;;; 16FE0;TANGUT ITERATION MARK;Lm;0;L;;;;;N;;;;; +16FE1;NUSHU ITERATION MARK;Lm;0;L;;;;;N;;;;; 17000;<Tangut Ideograph, First>;Lo;0;L;;;;;N;;;;; 187EC;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; @@ -24846,6 +25113,687 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18AF2;TANGUT COMPONENT-755;Lo;0;L;;;;;N;;;;; 1B000;KATAKANA LETTER ARCHAIC E;Lo;0;L;;;;;N;;;;; 1B001;HIRAGANA LETTER ARCHAIC YE;Lo;0;L;;;;;N;;;;; +1B002;HENTAIGANA LETTER A-1;Lo;0;L;;;;;N;;;;; +1B003;HENTAIGANA LETTER A-2;Lo;0;L;;;;;N;;;;; +1B004;HENTAIGANA LETTER A-3;Lo;0;L;;;;;N;;;;; +1B005;HENTAIGANA LETTER A-WO;Lo;0;L;;;;;N;;;;; +1B006;HENTAIGANA LETTER I-1;Lo;0;L;;;;;N;;;;; +1B007;HENTAIGANA LETTER I-2;Lo;0;L;;;;;N;;;;; +1B008;HENTAIGANA LETTER I-3;Lo;0;L;;;;;N;;;;; +1B009;HENTAIGANA LETTER I-4;Lo;0;L;;;;;N;;;;; +1B00A;HENTAIGANA LETTER U-1;Lo;0;L;;;;;N;;;;; +1B00B;HENTAIGANA LETTER U-2;Lo;0;L;;;;;N;;;;; +1B00C;HENTAIGANA LETTER U-3;Lo;0;L;;;;;N;;;;; +1B00D;HENTAIGANA LETTER U-4;Lo;0;L;;;;;N;;;;; +1B00E;HENTAIGANA LETTER U-5;Lo;0;L;;;;;N;;;;; +1B00F;HENTAIGANA LETTER E-2;Lo;0;L;;;;;N;;;;; +1B010;HENTAIGANA LETTER E-3;Lo;0;L;;;;;N;;;;; +1B011;HENTAIGANA LETTER E-4;Lo;0;L;;;;;N;;;;; +1B012;HENTAIGANA LETTER E-5;Lo;0;L;;;;;N;;;;; +1B013;HENTAIGANA LETTER E-6;Lo;0;L;;;;;N;;;;; +1B014;HENTAIGANA LETTER O-1;Lo;0;L;;;;;N;;;;; +1B015;HENTAIGANA LETTER O-2;Lo;0;L;;;;;N;;;;; +1B016;HENTAIGANA LETTER O-3;Lo;0;L;;;;;N;;;;; +1B017;HENTAIGANA LETTER KA-1;Lo;0;L;;;;;N;;;;; +1B018;HENTAIGANA LETTER KA-2;Lo;0;L;;;;;N;;;;; +1B019;HENTAIGANA LETTER KA-3;Lo;0;L;;;;;N;;;;; +1B01A;HENTAIGANA LETTER KA-4;Lo;0;L;;;;;N;;;;; +1B01B;HENTAIGANA LETTER KA-5;Lo;0;L;;;;;N;;;;; +1B01C;HENTAIGANA LETTER KA-6;Lo;0;L;;;;;N;;;;; +1B01D;HENTAIGANA LETTER KA-7;Lo;0;L;;;;;N;;;;; +1B01E;HENTAIGANA LETTER KA-8;Lo;0;L;;;;;N;;;;; +1B01F;HENTAIGANA LETTER KA-9;Lo;0;L;;;;;N;;;;; +1B020;HENTAIGANA LETTER KA-10;Lo;0;L;;;;;N;;;;; +1B021;HENTAIGANA LETTER KA-11;Lo;0;L;;;;;N;;;;; +1B022;HENTAIGANA LETTER KA-KE;Lo;0;L;;;;;N;;;;; +1B023;HENTAIGANA LETTER KI-1;Lo;0;L;;;;;N;;;;; +1B024;HENTAIGANA LETTER KI-2;Lo;0;L;;;;;N;;;;; +1B025;HENTAIGANA LETTER KI-3;Lo;0;L;;;;;N;;;;; +1B026;HENTAIGANA LETTER KI-4;Lo;0;L;;;;;N;;;;; +1B027;HENTAIGANA LETTER KI-5;Lo;0;L;;;;;N;;;;; +1B028;HENTAIGANA LETTER KI-6;Lo;0;L;;;;;N;;;;; +1B029;HENTAIGANA LETTER KI-7;Lo;0;L;;;;;N;;;;; +1B02A;HENTAIGANA LETTER KI-8;Lo;0;L;;;;;N;;;;; +1B02B;HENTAIGANA LETTER KU-1;Lo;0;L;;;;;N;;;;; +1B02C;HENTAIGANA LETTER KU-2;Lo;0;L;;;;;N;;;;; +1B02D;HENTAIGANA LETTER KU-3;Lo;0;L;;;;;N;;;;; +1B02E;HENTAIGANA LETTER KU-4;Lo;0;L;;;;;N;;;;; +1B02F;HENTAIGANA LETTER KU-5;Lo;0;L;;;;;N;;;;; +1B030;HENTAIGANA LETTER KU-6;Lo;0;L;;;;;N;;;;; +1B031;HENTAIGANA LETTER KU-7;Lo;0;L;;;;;N;;;;; +1B032;HENTAIGANA LETTER KE-1;Lo;0;L;;;;;N;;;;; +1B033;HENTAIGANA LETTER KE-2;Lo;0;L;;;;;N;;;;; +1B034;HENTAIGANA LETTER KE-3;Lo;0;L;;;;;N;;;;; +1B035;HENTAIGANA LETTER KE-4;Lo;0;L;;;;;N;;;;; +1B036;HENTAIGANA LETTER KE-5;Lo;0;L;;;;;N;;;;; +1B037;HENTAIGANA LETTER KE-6;Lo;0;L;;;;;N;;;;; +1B038;HENTAIGANA LETTER KO-1;Lo;0;L;;;;;N;;;;; +1B039;HENTAIGANA LETTER KO-2;Lo;0;L;;;;;N;;;;; +1B03A;HENTAIGANA LETTER KO-3;Lo;0;L;;;;;N;;;;; +1B03B;HENTAIGANA LETTER KO-KI;Lo;0;L;;;;;N;;;;; +1B03C;HENTAIGANA LETTER SA-1;Lo;0;L;;;;;N;;;;; +1B03D;HENTAIGANA LETTER SA-2;Lo;0;L;;;;;N;;;;; +1B03E;HENTAIGANA LETTER SA-3;Lo;0;L;;;;;N;;;;; +1B03F;HENTAIGANA LETTER SA-4;Lo;0;L;;;;;N;;;;; +1B040;HENTAIGANA LETTER SA-5;Lo;0;L;;;;;N;;;;; +1B041;HENTAIGANA LETTER SA-6;Lo;0;L;;;;;N;;;;; +1B042;HENTAIGANA LETTER SA-7;Lo;0;L;;;;;N;;;;; +1B043;HENTAIGANA LETTER SA-8;Lo;0;L;;;;;N;;;;; +1B044;HENTAIGANA LETTER SI-1;Lo;0;L;;;;;N;;;;; +1B045;HENTAIGANA LETTER SI-2;Lo;0;L;;;;;N;;;;; +1B046;HENTAIGANA LETTER SI-3;Lo;0;L;;;;;N;;;;; +1B047;HENTAIGANA LETTER SI-4;Lo;0;L;;;;;N;;;;; +1B048;HENTAIGANA LETTER SI-5;Lo;0;L;;;;;N;;;;; +1B049;HENTAIGANA LETTER SI-6;Lo;0;L;;;;;N;;;;; +1B04A;HENTAIGANA LETTER SU-1;Lo;0;L;;;;;N;;;;; +1B04B;HENTAIGANA LETTER SU-2;Lo;0;L;;;;;N;;;;; +1B04C;HENTAIGANA LETTER SU-3;Lo;0;L;;;;;N;;;;; +1B04D;HENTAIGANA LETTER SU-4;Lo;0;L;;;;;N;;;;; +1B04E;HENTAIGANA LETTER SU-5;Lo;0;L;;;;;N;;;;; +1B04F;HENTAIGANA LETTER SU-6;Lo;0;L;;;;;N;;;;; +1B050;HENTAIGANA LETTER SU-7;Lo;0;L;;;;;N;;;;; +1B051;HENTAIGANA LETTER SU-8;Lo;0;L;;;;;N;;;;; +1B052;HENTAIGANA LETTER SE-1;Lo;0;L;;;;;N;;;;; +1B053;HENTAIGANA LETTER SE-2;Lo;0;L;;;;;N;;;;; +1B054;HENTAIGANA LETTER SE-3;Lo;0;L;;;;;N;;;;; +1B055;HENTAIGANA LETTER SE-4;Lo;0;L;;;;;N;;;;; +1B056;HENTAIGANA LETTER SE-5;Lo;0;L;;;;;N;;;;; +1B057;HENTAIGANA LETTER SO-1;Lo;0;L;;;;;N;;;;; +1B058;HENTAIGANA LETTER SO-2;Lo;0;L;;;;;N;;;;; +1B059;HENTAIGANA LETTER SO-3;Lo;0;L;;;;;N;;;;; +1B05A;HENTAIGANA LETTER SO-4;Lo;0;L;;;;;N;;;;; +1B05B;HENTAIGANA LETTER SO-5;Lo;0;L;;;;;N;;;;; +1B05C;HENTAIGANA LETTER SO-6;Lo;0;L;;;;;N;;;;; +1B05D;HENTAIGANA LETTER SO-7;Lo;0;L;;;;;N;;;;; +1B05E;HENTAIGANA LETTER TA-1;Lo;0;L;;;;;N;;;;; +1B05F;HENTAIGANA LETTER TA-2;Lo;0;L;;;;;N;;;;; +1B060;HENTAIGANA LETTER TA-3;Lo;0;L;;;;;N;;;;; +1B061;HENTAIGANA LETTER TA-4;Lo;0;L;;;;;N;;;;; +1B062;HENTAIGANA LETTER TI-1;Lo;0;L;;;;;N;;;;; +1B063;HENTAIGANA LETTER TI-2;Lo;0;L;;;;;N;;;;; +1B064;HENTAIGANA LETTER TI-3;Lo;0;L;;;;;N;;;;; +1B065;HENTAIGANA LETTER TI-4;Lo;0;L;;;;;N;;;;; +1B066;HENTAIGANA LETTER TI-5;Lo;0;L;;;;;N;;;;; +1B067;HENTAIGANA LETTER TI-6;Lo;0;L;;;;;N;;;;; +1B068;HENTAIGANA LETTER TI-7;Lo;0;L;;;;;N;;;;; +1B069;HENTAIGANA LETTER TU-1;Lo;0;L;;;;;N;;;;; +1B06A;HENTAIGANA LETTER TU-2;Lo;0;L;;;;;N;;;;; +1B06B;HENTAIGANA LETTER TU-3;Lo;0;L;;;;;N;;;;; +1B06C;HENTAIGANA LETTER TU-4;Lo;0;L;;;;;N;;;;; +1B06D;HENTAIGANA LETTER TU-TO;Lo;0;L;;;;;N;;;;; +1B06E;HENTAIGANA LETTER TE-1;Lo;0;L;;;;;N;;;;; +1B06F;HENTAIGANA LETTER TE-2;Lo;0;L;;;;;N;;;;; +1B070;HENTAIGANA LETTER TE-3;Lo;0;L;;;;;N;;;;; +1B071;HENTAIGANA LETTER TE-4;Lo;0;L;;;;;N;;;;; +1B072;HENTAIGANA LETTER TE-5;Lo;0;L;;;;;N;;;;; +1B073;HENTAIGANA LETTER TE-6;Lo;0;L;;;;;N;;;;; +1B074;HENTAIGANA LETTER TE-7;Lo;0;L;;;;;N;;;;; +1B075;HENTAIGANA LETTER TE-8;Lo;0;L;;;;;N;;;;; +1B076;HENTAIGANA LETTER TE-9;Lo;0;L;;;;;N;;;;; +1B077;HENTAIGANA LETTER TO-1;Lo;0;L;;;;;N;;;;; +1B078;HENTAIGANA LETTER TO-2;Lo;0;L;;;;;N;;;;; +1B079;HENTAIGANA LETTER TO-3;Lo;0;L;;;;;N;;;;; +1B07A;HENTAIGANA LETTER TO-4;Lo;0;L;;;;;N;;;;; +1B07B;HENTAIGANA LETTER TO-5;Lo;0;L;;;;;N;;;;; +1B07C;HENTAIGANA LETTER TO-6;Lo;0;L;;;;;N;;;;; +1B07D;HENTAIGANA LETTER TO-RA;Lo;0;L;;;;;N;;;;; +1B07E;HENTAIGANA LETTER NA-1;Lo;0;L;;;;;N;;;;; +1B07F;HENTAIGANA LETTER NA-2;Lo;0;L;;;;;N;;;;; +1B080;HENTAIGANA LETTER NA-3;Lo;0;L;;;;;N;;;;; +1B081;HENTAIGANA LETTER NA-4;Lo;0;L;;;;;N;;;;; +1B082;HENTAIGANA LETTER NA-5;Lo;0;L;;;;;N;;;;; +1B083;HENTAIGANA LETTER NA-6;Lo;0;L;;;;;N;;;;; +1B084;HENTAIGANA LETTER NA-7;Lo;0;L;;;;;N;;;;; +1B085;HENTAIGANA LETTER NA-8;Lo;0;L;;;;;N;;;;; +1B086;HENTAIGANA LETTER NA-9;Lo;0;L;;;;;N;;;;; +1B087;HENTAIGANA LETTER NI-1;Lo;0;L;;;;;N;;;;; +1B088;HENTAIGANA LETTER NI-2;Lo;0;L;;;;;N;;;;; +1B089;HENTAIGANA LETTER NI-3;Lo;0;L;;;;;N;;;;; +1B08A;HENTAIGANA LETTER NI-4;Lo;0;L;;;;;N;;;;; +1B08B;HENTAIGANA LETTER NI-5;Lo;0;L;;;;;N;;;;; +1B08C;HENTAIGANA LETTER NI-6;Lo;0;L;;;;;N;;;;; +1B08D;HENTAIGANA LETTER NI-7;Lo;0;L;;;;;N;;;;; +1B08E;HENTAIGANA LETTER NI-TE;Lo;0;L;;;;;N;;;;; +1B08F;HENTAIGANA LETTER NU-1;Lo;0;L;;;;;N;;;;; +1B090;HENTAIGANA LETTER NU-2;Lo;0;L;;;;;N;;;;; +1B091;HENTAIGANA LETTER NU-3;Lo;0;L;;;;;N;;;;; +1B092;HENTAIGANA LETTER NE-1;Lo;0;L;;;;;N;;;;; +1B093;HENTAIGANA LETTER NE-2;Lo;0;L;;;;;N;;;;; +1B094;HENTAIGANA LETTER NE-3;Lo;0;L;;;;;N;;;;; +1B095;HENTAIGANA LETTER NE-4;Lo;0;L;;;;;N;;;;; +1B096;HENTAIGANA LETTER NE-5;Lo;0;L;;;;;N;;;;; +1B097;HENTAIGANA LETTER NE-6;Lo;0;L;;;;;N;;;;; +1B098;HENTAIGANA LETTER NE-KO;Lo;0;L;;;;;N;;;;; +1B099;HENTAIGANA LETTER NO-1;Lo;0;L;;;;;N;;;;; +1B09A;HENTAIGANA LETTER NO-2;Lo;0;L;;;;;N;;;;; +1B09B;HENTAIGANA LETTER NO-3;Lo;0;L;;;;;N;;;;; +1B09C;HENTAIGANA LETTER NO-4;Lo;0;L;;;;;N;;;;; +1B09D;HENTAIGANA LETTER NO-5;Lo;0;L;;;;;N;;;;; +1B09E;HENTAIGANA LETTER HA-1;Lo;0;L;;;;;N;;;;; +1B09F;HENTAIGANA LETTER HA-2;Lo;0;L;;;;;N;;;;; +1B0A0;HENTAIGANA LETTER HA-3;Lo;0;L;;;;;N;;;;; +1B0A1;HENTAIGANA LETTER HA-4;Lo;0;L;;;;;N;;;;; +1B0A2;HENTAIGANA LETTER HA-5;Lo;0;L;;;;;N;;;;; +1B0A3;HENTAIGANA LETTER HA-6;Lo;0;L;;;;;N;;;;; +1B0A4;HENTAIGANA LETTER HA-7;Lo;0;L;;;;;N;;;;; +1B0A5;HENTAIGANA LETTER HA-8;Lo;0;L;;;;;N;;;;; +1B0A6;HENTAIGANA LETTER HA-9;Lo;0;L;;;;;N;;;;; +1B0A7;HENTAIGANA LETTER HA-10;Lo;0;L;;;;;N;;;;; +1B0A8;HENTAIGANA LETTER HA-11;Lo;0;L;;;;;N;;;;; +1B0A9;HENTAIGANA LETTER HI-1;Lo;0;L;;;;;N;;;;; +1B0AA;HENTAIGANA LETTER HI-2;Lo;0;L;;;;;N;;;;; +1B0AB;HENTAIGANA LETTER HI-3;Lo;0;L;;;;;N;;;;; +1B0AC;HENTAIGANA LETTER HI-4;Lo;0;L;;;;;N;;;;; +1B0AD;HENTAIGANA LETTER HI-5;Lo;0;L;;;;;N;;;;; +1B0AE;HENTAIGANA LETTER HI-6;Lo;0;L;;;;;N;;;;; +1B0AF;HENTAIGANA LETTER HI-7;Lo;0;L;;;;;N;;;;; +1B0B0;HENTAIGANA LETTER HU-1;Lo;0;L;;;;;N;;;;; +1B0B1;HENTAIGANA LETTER HU-2;Lo;0;L;;;;;N;;;;; +1B0B2;HENTAIGANA LETTER HU-3;Lo;0;L;;;;;N;;;;; +1B0B3;HENTAIGANA LETTER HE-1;Lo;0;L;;;;;N;;;;; +1B0B4;HENTAIGANA LETTER HE-2;Lo;0;L;;;;;N;;;;; +1B0B5;HENTAIGANA LETTER HE-3;Lo;0;L;;;;;N;;;;; +1B0B6;HENTAIGANA LETTER HE-4;Lo;0;L;;;;;N;;;;; +1B0B7;HENTAIGANA LETTER HE-5;Lo;0;L;;;;;N;;;;; +1B0B8;HENTAIGANA LETTER HE-6;Lo;0;L;;;;;N;;;;; +1B0B9;HENTAIGANA LETTER HE-7;Lo;0;L;;;;;N;;;;; +1B0BA;HENTAIGANA LETTER HO-1;Lo;0;L;;;;;N;;;;; +1B0BB;HENTAIGANA LETTER HO-2;Lo;0;L;;;;;N;;;;; +1B0BC;HENTAIGANA LETTER HO-3;Lo;0;L;;;;;N;;;;; +1B0BD;HENTAIGANA LETTER HO-4;Lo;0;L;;;;;N;;;;; +1B0BE;HENTAIGANA LETTER HO-5;Lo;0;L;;;;;N;;;;; +1B0BF;HENTAIGANA LETTER HO-6;Lo;0;L;;;;;N;;;;; +1B0C0;HENTAIGANA LETTER HO-7;Lo;0;L;;;;;N;;;;; +1B0C1;HENTAIGANA LETTER HO-8;Lo;0;L;;;;;N;;;;; +1B0C2;HENTAIGANA LETTER MA-1;Lo;0;L;;;;;N;;;;; +1B0C3;HENTAIGANA LETTER MA-2;Lo;0;L;;;;;N;;;;; +1B0C4;HENTAIGANA LETTER MA-3;Lo;0;L;;;;;N;;;;; +1B0C5;HENTAIGANA LETTER MA-4;Lo;0;L;;;;;N;;;;; +1B0C6;HENTAIGANA LETTER MA-5;Lo;0;L;;;;;N;;;;; +1B0C7;HENTAIGANA LETTER MA-6;Lo;0;L;;;;;N;;;;; +1B0C8;HENTAIGANA LETTER MA-7;Lo;0;L;;;;;N;;;;; +1B0C9;HENTAIGANA LETTER MI-1;Lo;0;L;;;;;N;;;;; +1B0CA;HENTAIGANA LETTER MI-2;Lo;0;L;;;;;N;;;;; +1B0CB;HENTAIGANA LETTER MI-3;Lo;0;L;;;;;N;;;;; +1B0CC;HENTAIGANA LETTER MI-4;Lo;0;L;;;;;N;;;;; +1B0CD;HENTAIGANA LETTER MI-5;Lo;0;L;;;;;N;;;;; +1B0CE;HENTAIGANA LETTER MI-6;Lo;0;L;;;;;N;;;;; +1B0CF;HENTAIGANA LETTER MI-7;Lo;0;L;;;;;N;;;;; +1B0D0;HENTAIGANA LETTER MU-1;Lo;0;L;;;;;N;;;;; +1B0D1;HENTAIGANA LETTER MU-2;Lo;0;L;;;;;N;;;;; +1B0D2;HENTAIGANA LETTER MU-3;Lo;0;L;;;;;N;;;;; +1B0D3;HENTAIGANA LETTER MU-4;Lo;0;L;;;;;N;;;;; +1B0D4;HENTAIGANA LETTER ME-1;Lo;0;L;;;;;N;;;;; +1B0D5;HENTAIGANA LETTER ME-2;Lo;0;L;;;;;N;;;;; +1B0D6;HENTAIGANA LETTER ME-MA;Lo;0;L;;;;;N;;;;; +1B0D7;HENTAIGANA LETTER MO-1;Lo;0;L;;;;;N;;;;; +1B0D8;HENTAIGANA LETTER MO-2;Lo;0;L;;;;;N;;;;; +1B0D9;HENTAIGANA LETTER MO-3;Lo;0;L;;;;;N;;;;; +1B0DA;HENTAIGANA LETTER MO-4;Lo;0;L;;;;;N;;;;; +1B0DB;HENTAIGANA LETTER MO-5;Lo;0;L;;;;;N;;;;; +1B0DC;HENTAIGANA LETTER MO-6;Lo;0;L;;;;;N;;;;; +1B0DD;HENTAIGANA LETTER YA-1;Lo;0;L;;;;;N;;;;; +1B0DE;HENTAIGANA LETTER YA-2;Lo;0;L;;;;;N;;;;; +1B0DF;HENTAIGANA LETTER YA-3;Lo;0;L;;;;;N;;;;; +1B0E0;HENTAIGANA LETTER YA-4;Lo;0;L;;;;;N;;;;; +1B0E1;HENTAIGANA LETTER YA-5;Lo;0;L;;;;;N;;;;; +1B0E2;HENTAIGANA LETTER YA-YO;Lo;0;L;;;;;N;;;;; +1B0E3;HENTAIGANA LETTER YU-1;Lo;0;L;;;;;N;;;;; +1B0E4;HENTAIGANA LETTER YU-2;Lo;0;L;;;;;N;;;;; +1B0E5;HENTAIGANA LETTER YU-3;Lo;0;L;;;;;N;;;;; +1B0E6;HENTAIGANA LETTER YU-4;Lo;0;L;;;;;N;;;;; +1B0E7;HENTAIGANA LETTER YO-1;Lo;0;L;;;;;N;;;;; +1B0E8;HENTAIGANA LETTER YO-2;Lo;0;L;;;;;N;;;;; +1B0E9;HENTAIGANA LETTER YO-3;Lo;0;L;;;;;N;;;;; +1B0EA;HENTAIGANA LETTER YO-4;Lo;0;L;;;;;N;;;;; +1B0EB;HENTAIGANA LETTER YO-5;Lo;0;L;;;;;N;;;;; +1B0EC;HENTAIGANA LETTER YO-6;Lo;0;L;;;;;N;;;;; +1B0ED;HENTAIGANA LETTER RA-1;Lo;0;L;;;;;N;;;;; +1B0EE;HENTAIGANA LETTER RA-2;Lo;0;L;;;;;N;;;;; +1B0EF;HENTAIGANA LETTER RA-3;Lo;0;L;;;;;N;;;;; +1B0F0;HENTAIGANA LETTER RA-4;Lo;0;L;;;;;N;;;;; +1B0F1;HENTAIGANA LETTER RI-1;Lo;0;L;;;;;N;;;;; +1B0F2;HENTAIGANA LETTER RI-2;Lo;0;L;;;;;N;;;;; +1B0F3;HENTAIGANA LETTER RI-3;Lo;0;L;;;;;N;;;;; +1B0F4;HENTAIGANA LETTER RI-4;Lo;0;L;;;;;N;;;;; +1B0F5;HENTAIGANA LETTER RI-5;Lo;0;L;;;;;N;;;;; +1B0F6;HENTAIGANA LETTER RI-6;Lo;0;L;;;;;N;;;;; +1B0F7;HENTAIGANA LETTER RI-7;Lo;0;L;;;;;N;;;;; +1B0F8;HENTAIGANA LETTER RU-1;Lo;0;L;;;;;N;;;;; +1B0F9;HENTAIGANA LETTER RU-2;Lo;0;L;;;;;N;;;;; +1B0FA;HENTAIGANA LETTER RU-3;Lo;0;L;;;;;N;;;;; +1B0FB;HENTAIGANA LETTER RU-4;Lo;0;L;;;;;N;;;;; +1B0FC;HENTAIGANA LETTER RU-5;Lo;0;L;;;;;N;;;;; +1B0FD;HENTAIGANA LETTER RU-6;Lo;0;L;;;;;N;;;;; +1B0FE;HENTAIGANA LETTER RE-1;Lo;0;L;;;;;N;;;;; +1B0FF;HENTAIGANA LETTER RE-2;Lo;0;L;;;;;N;;;;; +1B100;HENTAIGANA LETTER RE-3;Lo;0;L;;;;;N;;;;; +1B101;HENTAIGANA LETTER RE-4;Lo;0;L;;;;;N;;;;; +1B102;HENTAIGANA LETTER RO-1;Lo;0;L;;;;;N;;;;; +1B103;HENTAIGANA LETTER RO-2;Lo;0;L;;;;;N;;;;; +1B104;HENTAIGANA LETTER RO-3;Lo;0;L;;;;;N;;;;; +1B105;HENTAIGANA LETTER RO-4;Lo;0;L;;;;;N;;;;; +1B106;HENTAIGANA LETTER RO-5;Lo;0;L;;;;;N;;;;; +1B107;HENTAIGANA LETTER RO-6;Lo;0;L;;;;;N;;;;; +1B108;HENTAIGANA LETTER WA-1;Lo;0;L;;;;;N;;;;; +1B109;HENTAIGANA LETTER WA-2;Lo;0;L;;;;;N;;;;; +1B10A;HENTAIGANA LETTER WA-3;Lo;0;L;;;;;N;;;;; +1B10B;HENTAIGANA LETTER WA-4;Lo;0;L;;;;;N;;;;; +1B10C;HENTAIGANA LETTER WA-5;Lo;0;L;;;;;N;;;;; +1B10D;HENTAIGANA LETTER WI-1;Lo;0;L;;;;;N;;;;; +1B10E;HENTAIGANA LETTER WI-2;Lo;0;L;;;;;N;;;;; +1B10F;HENTAIGANA LETTER WI-3;Lo;0;L;;;;;N;;;;; +1B110;HENTAIGANA LETTER WI-4;Lo;0;L;;;;;N;;;;; +1B111;HENTAIGANA LETTER WI-5;Lo;0;L;;;;;N;;;;; +1B112;HENTAIGANA LETTER WE-1;Lo;0;L;;;;;N;;;;; +1B113;HENTAIGANA LETTER WE-2;Lo;0;L;;;;;N;;;;; +1B114;HENTAIGANA LETTER WE-3;Lo;0;L;;;;;N;;;;; +1B115;HENTAIGANA LETTER WE-4;Lo;0;L;;;;;N;;;;; +1B116;HENTAIGANA LETTER WO-1;Lo;0;L;;;;;N;;;;; +1B117;HENTAIGANA LETTER WO-2;Lo;0;L;;;;;N;;;;; +1B118;HENTAIGANA LETTER WO-3;Lo;0;L;;;;;N;;;;; +1B119;HENTAIGANA LETTER WO-4;Lo;0;L;;;;;N;;;;; +1B11A;HENTAIGANA LETTER WO-5;Lo;0;L;;;;;N;;;;; +1B11B;HENTAIGANA LETTER WO-6;Lo;0;L;;;;;N;;;;; +1B11C;HENTAIGANA LETTER WO-7;Lo;0;L;;;;;N;;;;; +1B11D;HENTAIGANA LETTER N-MU-MO-1;Lo;0;L;;;;;N;;;;; +1B11E;HENTAIGANA LETTER N-MU-MO-2;Lo;0;L;;;;;N;;;;; +1B170;NUSHU CHARACTER-1B170;Lo;0;L;;;;;N;;;;; +1B171;NUSHU CHARACTER-1B171;Lo;0;L;;;;;N;;;;; +1B172;NUSHU CHARACTER-1B172;Lo;0;L;;;;;N;;;;; +1B173;NUSHU CHARACTER-1B173;Lo;0;L;;;;;N;;;;; +1B174;NUSHU CHARACTER-1B174;Lo;0;L;;;;;N;;;;; +1B175;NUSHU CHARACTER-1B175;Lo;0;L;;;;;N;;;;; +1B176;NUSHU CHARACTER-1B176;Lo;0;L;;;;;N;;;;; +1B177;NUSHU CHARACTER-1B177;Lo;0;L;;;;;N;;;;; +1B178;NUSHU CHARACTER-1B178;Lo;0;L;;;;;N;;;;; +1B179;NUSHU CHARACTER-1B179;Lo;0;L;;;;;N;;;;; +1B17A;NUSHU CHARACTER-1B17A;Lo;0;L;;;;;N;;;;; +1B17B;NUSHU CHARACTER-1B17B;Lo;0;L;;;;;N;;;;; +1B17C;NUSHU CHARACTER-1B17C;Lo;0;L;;;;;N;;;;; +1B17D;NUSHU CHARACTER-1B17D;Lo;0;L;;;;;N;;;;; +1B17E;NUSHU CHARACTER-1B17E;Lo;0;L;;;;;N;;;;; +1B17F;NUSHU CHARACTER-1B17F;Lo;0;L;;;;;N;;;;; +1B180;NUSHU CHARACTER-1B180;Lo;0;L;;;;;N;;;;; +1B181;NUSHU CHARACTER-1B181;Lo;0;L;;;;;N;;;;; +1B182;NUSHU CHARACTER-1B182;Lo;0;L;;;;;N;;;;; +1B183;NUSHU CHARACTER-1B183;Lo;0;L;;;;;N;;;;; +1B184;NUSHU CHARACTER-1B184;Lo;0;L;;;;;N;;;;; +1B185;NUSHU CHARACTER-1B185;Lo;0;L;;;;;N;;;;; +1B186;NUSHU CHARACTER-1B186;Lo;0;L;;;;;N;;;;; +1B187;NUSHU CHARACTER-1B187;Lo;0;L;;;;;N;;;;; +1B188;NUSHU CHARACTER-1B188;Lo;0;L;;;;;N;;;;; +1B189;NUSHU CHARACTER-1B189;Lo;0;L;;;;;N;;;;; +1B18A;NUSHU CHARACTER-1B18A;Lo;0;L;;;;;N;;;;; +1B18B;NUSHU CHARACTER-1B18B;Lo;0;L;;;;;N;;;;; +1B18C;NUSHU CHARACTER-1B18C;Lo;0;L;;;;;N;;;;; +1B18D;NUSHU CHARACTER-1B18D;Lo;0;L;;;;;N;;;;; +1B18E;NUSHU CHARACTER-1B18E;Lo;0;L;;;;;N;;;;; +1B18F;NUSHU CHARACTER-1B18F;Lo;0;L;;;;;N;;;;; +1B190;NUSHU CHARACTER-1B190;Lo;0;L;;;;;N;;;;; +1B191;NUSHU CHARACTER-1B191;Lo;0;L;;;;;N;;;;; +1B192;NUSHU CHARACTER-1B192;Lo;0;L;;;;;N;;;;; +1B193;NUSHU CHARACTER-1B193;Lo;0;L;;;;;N;;;;; +1B194;NUSHU CHARACTER-1B194;Lo;0;L;;;;;N;;;;; +1B195;NUSHU CHARACTER-1B195;Lo;0;L;;;;;N;;;;; +1B196;NUSHU CHARACTER-1B196;Lo;0;L;;;;;N;;;;; +1B197;NUSHU CHARACTER-1B197;Lo;0;L;;;;;N;;;;; +1B198;NUSHU CHARACTER-1B198;Lo;0;L;;;;;N;;;;; +1B199;NUSHU CHARACTER-1B199;Lo;0;L;;;;;N;;;;; +1B19A;NUSHU CHARACTER-1B19A;Lo;0;L;;;;;N;;;;; +1B19B;NUSHU CHARACTER-1B19B;Lo;0;L;;;;;N;;;;; +1B19C;NUSHU CHARACTER-1B19C;Lo;0;L;;;;;N;;;;; +1B19D;NUSHU CHARACTER-1B19D;Lo;0;L;;;;;N;;;;; +1B19E;NUSHU CHARACTER-1B19E;Lo;0;L;;;;;N;;;;; +1B19F;NUSHU CHARACTER-1B19F;Lo;0;L;;;;;N;;;;; +1B1A0;NUSHU CHARACTER-1B1A0;Lo;0;L;;;;;N;;;;; +1B1A1;NUSHU CHARACTER-1B1A1;Lo;0;L;;;;;N;;;;; +1B1A2;NUSHU CHARACTER-1B1A2;Lo;0;L;;;;;N;;;;; +1B1A3;NUSHU CHARACTER-1B1A3;Lo;0;L;;;;;N;;;;; +1B1A4;NUSHU CHARACTER-1B1A4;Lo;0;L;;;;;N;;;;; +1B1A5;NUSHU CHARACTER-1B1A5;Lo;0;L;;;;;N;;;;; +1B1A6;NUSHU CHARACTER-1B1A6;Lo;0;L;;;;;N;;;;; +1B1A7;NUSHU CHARACTER-1B1A7;Lo;0;L;;;;;N;;;;; +1B1A8;NUSHU CHARACTER-1B1A8;Lo;0;L;;;;;N;;;;; +1B1A9;NUSHU CHARACTER-1B1A9;Lo;0;L;;;;;N;;;;; +1B1AA;NUSHU CHARACTER-1B1AA;Lo;0;L;;;;;N;;;;; +1B1AB;NUSHU CHARACTER-1B1AB;Lo;0;L;;;;;N;;;;; +1B1AC;NUSHU CHARACTER-1B1AC;Lo;0;L;;;;;N;;;;; +1B1AD;NUSHU CHARACTER-1B1AD;Lo;0;L;;;;;N;;;;; +1B1AE;NUSHU CHARACTER-1B1AE;Lo;0;L;;;;;N;;;;; +1B1AF;NUSHU CHARACTER-1B1AF;Lo;0;L;;;;;N;;;;; +1B1B0;NUSHU CHARACTER-1B1B0;Lo;0;L;;;;;N;;;;; +1B1B1;NUSHU CHARACTER-1B1B1;Lo;0;L;;;;;N;;;;; +1B1B2;NUSHU CHARACTER-1B1B2;Lo;0;L;;;;;N;;;;; +1B1B3;NUSHU CHARACTER-1B1B3;Lo;0;L;;;;;N;;;;; +1B1B4;NUSHU CHARACTER-1B1B4;Lo;0;L;;;;;N;;;;; +1B1B5;NUSHU CHARACTER-1B1B5;Lo;0;L;;;;;N;;;;; +1B1B6;NUSHU CHARACTER-1B1B6;Lo;0;L;;;;;N;;;;; +1B1B7;NUSHU CHARACTER-1B1B7;Lo;0;L;;;;;N;;;;; +1B1B8;NUSHU CHARACTER-1B1B8;Lo;0;L;;;;;N;;;;; +1B1B9;NUSHU CHARACTER-1B1B9;Lo;0;L;;;;;N;;;;; +1B1BA;NUSHU CHARACTER-1B1BA;Lo;0;L;;;;;N;;;;; +1B1BB;NUSHU CHARACTER-1B1BB;Lo;0;L;;;;;N;;;;; +1B1BC;NUSHU CHARACTER-1B1BC;Lo;0;L;;;;;N;;;;; +1B1BD;NUSHU CHARACTER-1B1BD;Lo;0;L;;;;;N;;;;; +1B1BE;NUSHU CHARACTER-1B1BE;Lo;0;L;;;;;N;;;;; +1B1BF;NUSHU CHARACTER-1B1BF;Lo;0;L;;;;;N;;;;; +1B1C0;NUSHU CHARACTER-1B1C0;Lo;0;L;;;;;N;;;;; +1B1C1;NUSHU CHARACTER-1B1C1;Lo;0;L;;;;;N;;;;; +1B1C2;NUSHU CHARACTER-1B1C2;Lo;0;L;;;;;N;;;;; +1B1C3;NUSHU CHARACTER-1B1C3;Lo;0;L;;;;;N;;;;; +1B1C4;NUSHU CHARACTER-1B1C4;Lo;0;L;;;;;N;;;;; +1B1C5;NUSHU CHARACTER-1B1C5;Lo;0;L;;;;;N;;;;; +1B1C6;NUSHU CHARACTER-1B1C6;Lo;0;L;;;;;N;;;;; +1B1C7;NUSHU CHARACTER-1B1C7;Lo;0;L;;;;;N;;;;; +1B1C8;NUSHU CHARACTER-1B1C8;Lo;0;L;;;;;N;;;;; +1B1C9;NUSHU CHARACTER-1B1C9;Lo;0;L;;;;;N;;;;; +1B1CA;NUSHU CHARACTER-1B1CA;Lo;0;L;;;;;N;;;;; +1B1CB;NUSHU CHARACTER-1B1CB;Lo;0;L;;;;;N;;;;; +1B1CC;NUSHU CHARACTER-1B1CC;Lo;0;L;;;;;N;;;;; +1B1CD;NUSHU CHARACTER-1B1CD;Lo;0;L;;;;;N;;;;; +1B1CE;NUSHU CHARACTER-1B1CE;Lo;0;L;;;;;N;;;;; +1B1CF;NUSHU CHARACTER-1B1CF;Lo;0;L;;;;;N;;;;; +1B1D0;NUSHU CHARACTER-1B1D0;Lo;0;L;;;;;N;;;;; +1B1D1;NUSHU CHARACTER-1B1D1;Lo;0;L;;;;;N;;;;; +1B1D2;NUSHU CHARACTER-1B1D2;Lo;0;L;;;;;N;;;;; +1B1D3;NUSHU CHARACTER-1B1D3;Lo;0;L;;;;;N;;;;; +1B1D4;NUSHU CHARACTER-1B1D4;Lo;0;L;;;;;N;;;;; +1B1D5;NUSHU CHARACTER-1B1D5;Lo;0;L;;;;;N;;;;; +1B1D6;NUSHU CHARACTER-1B1D6;Lo;0;L;;;;;N;;;;; +1B1D7;NUSHU CHARACTER-1B1D7;Lo;0;L;;;;;N;;;;; +1B1D8;NUSHU CHARACTER-1B1D8;Lo;0;L;;;;;N;;;;; +1B1D9;NUSHU CHARACTER-1B1D9;Lo;0;L;;;;;N;;;;; +1B1DA;NUSHU CHARACTER-1B1DA;Lo;0;L;;;;;N;;;;; +1B1DB;NUSHU CHARACTER-1B1DB;Lo;0;L;;;;;N;;;;; +1B1DC;NUSHU CHARACTER-1B1DC;Lo;0;L;;;;;N;;;;; +1B1DD;NUSHU CHARACTER-1B1DD;Lo;0;L;;;;;N;;;;; +1B1DE;NUSHU CHARACTER-1B1DE;Lo;0;L;;;;;N;;;;; +1B1DF;NUSHU CHARACTER-1B1DF;Lo;0;L;;;;;N;;;;; +1B1E0;NUSHU CHARACTER-1B1E0;Lo;0;L;;;;;N;;;;; +1B1E1;NUSHU CHARACTER-1B1E1;Lo;0;L;;;;;N;;;;; +1B1E2;NUSHU CHARACTER-1B1E2;Lo;0;L;;;;;N;;;;; +1B1E3;NUSHU CHARACTER-1B1E3;Lo;0;L;;;;;N;;;;; +1B1E4;NUSHU CHARACTER-1B1E4;Lo;0;L;;;;;N;;;;; +1B1E5;NUSHU CHARACTER-1B1E5;Lo;0;L;;;;;N;;;;; +1B1E6;NUSHU CHARACTER-1B1E6;Lo;0;L;;;;;N;;;;; +1B1E7;NUSHU CHARACTER-1B1E7;Lo;0;L;;;;;N;;;;; +1B1E8;NUSHU CHARACTER-1B1E8;Lo;0;L;;;;;N;;;;; +1B1E9;NUSHU CHARACTER-1B1E9;Lo;0;L;;;;;N;;;;; +1B1EA;NUSHU CHARACTER-1B1EA;Lo;0;L;;;;;N;;;;; +1B1EB;NUSHU CHARACTER-1B1EB;Lo;0;L;;;;;N;;;;; +1B1EC;NUSHU CHARACTER-1B1EC;Lo;0;L;;;;;N;;;;; +1B1ED;NUSHU CHARACTER-1B1ED;Lo;0;L;;;;;N;;;;; +1B1EE;NUSHU CHARACTER-1B1EE;Lo;0;L;;;;;N;;;;; +1B1EF;NUSHU CHARACTER-1B1EF;Lo;0;L;;;;;N;;;;; +1B1F0;NUSHU CHARACTER-1B1F0;Lo;0;L;;;;;N;;;;; +1B1F1;NUSHU CHARACTER-1B1F1;Lo;0;L;;;;;N;;;;; +1B1F2;NUSHU CHARACTER-1B1F2;Lo;0;L;;;;;N;;;;; +1B1F3;NUSHU CHARACTER-1B1F3;Lo;0;L;;;;;N;;;;; +1B1F4;NUSHU CHARACTER-1B1F4;Lo;0;L;;;;;N;;;;; +1B1F5;NUSHU CHARACTER-1B1F5;Lo;0;L;;;;;N;;;;; +1B1F6;NUSHU CHARACTER-1B1F6;Lo;0;L;;;;;N;;;;; +1B1F7;NUSHU CHARACTER-1B1F7;Lo;0;L;;;;;N;;;;; +1B1F8;NUSHU CHARACTER-1B1F8;Lo;0;L;;;;;N;;;;; +1B1F9;NUSHU CHARACTER-1B1F9;Lo;0;L;;;;;N;;;;; +1B1FA;NUSHU CHARACTER-1B1FA;Lo;0;L;;;;;N;;;;; +1B1FB;NUSHU CHARACTER-1B1FB;Lo;0;L;;;;;N;;;;; +1B1FC;NUSHU CHARACTER-1B1FC;Lo;0;L;;;;;N;;;;; +1B1FD;NUSHU CHARACTER-1B1FD;Lo;0;L;;;;;N;;;;; +1B1FE;NUSHU CHARACTER-1B1FE;Lo;0;L;;;;;N;;;;; +1B1FF;NUSHU CHARACTER-1B1FF;Lo;0;L;;;;;N;;;;; +1B200;NUSHU CHARACTER-1B200;Lo;0;L;;;;;N;;;;; +1B201;NUSHU CHARACTER-1B201;Lo;0;L;;;;;N;;;;; +1B202;NUSHU CHARACTER-1B202;Lo;0;L;;;;;N;;;;; +1B203;NUSHU CHARACTER-1B203;Lo;0;L;;;;;N;;;;; +1B204;NUSHU CHARACTER-1B204;Lo;0;L;;;;;N;;;;; +1B205;NUSHU CHARACTER-1B205;Lo;0;L;;;;;N;;;;; +1B206;NUSHU CHARACTER-1B206;Lo;0;L;;;;;N;;;;; +1B207;NUSHU CHARACTER-1B207;Lo;0;L;;;;;N;;;;; +1B208;NUSHU CHARACTER-1B208;Lo;0;L;;;;;N;;;;; +1B209;NUSHU CHARACTER-1B209;Lo;0;L;;;;;N;;;;; +1B20A;NUSHU CHARACTER-1B20A;Lo;0;L;;;;;N;;;;; +1B20B;NUSHU CHARACTER-1B20B;Lo;0;L;;;;;N;;;;; +1B20C;NUSHU CHARACTER-1B20C;Lo;0;L;;;;;N;;;;; +1B20D;NUSHU CHARACTER-1B20D;Lo;0;L;;;;;N;;;;; +1B20E;NUSHU CHARACTER-1B20E;Lo;0;L;;;;;N;;;;; +1B20F;NUSHU CHARACTER-1B20F;Lo;0;L;;;;;N;;;;; +1B210;NUSHU CHARACTER-1B210;Lo;0;L;;;;;N;;;;; +1B211;NUSHU CHARACTER-1B211;Lo;0;L;;;;;N;;;;; +1B212;NUSHU CHARACTER-1B212;Lo;0;L;;;;;N;;;;; +1B213;NUSHU CHARACTER-1B213;Lo;0;L;;;;;N;;;;; +1B214;NUSHU CHARACTER-1B214;Lo;0;L;;;;;N;;;;; +1B215;NUSHU CHARACTER-1B215;Lo;0;L;;;;;N;;;;; +1B216;NUSHU CHARACTER-1B216;Lo;0;L;;;;;N;;;;; +1B217;NUSHU CHARACTER-1B217;Lo;0;L;;;;;N;;;;; +1B218;NUSHU CHARACTER-1B218;Lo;0;L;;;;;N;;;;; +1B219;NUSHU CHARACTER-1B219;Lo;0;L;;;;;N;;;;; +1B21A;NUSHU CHARACTER-1B21A;Lo;0;L;;;;;N;;;;; +1B21B;NUSHU CHARACTER-1B21B;Lo;0;L;;;;;N;;;;; +1B21C;NUSHU CHARACTER-1B21C;Lo;0;L;;;;;N;;;;; +1B21D;NUSHU CHARACTER-1B21D;Lo;0;L;;;;;N;;;;; +1B21E;NUSHU CHARACTER-1B21E;Lo;0;L;;;;;N;;;;; +1B21F;NUSHU CHARACTER-1B21F;Lo;0;L;;;;;N;;;;; +1B220;NUSHU CHARACTER-1B220;Lo;0;L;;;;;N;;;;; +1B221;NUSHU CHARACTER-1B221;Lo;0;L;;;;;N;;;;; +1B222;NUSHU CHARACTER-1B222;Lo;0;L;;;;;N;;;;; +1B223;NUSHU CHARACTER-1B223;Lo;0;L;;;;;N;;;;; +1B224;NUSHU CHARACTER-1B224;Lo;0;L;;;;;N;;;;; +1B225;NUSHU CHARACTER-1B225;Lo;0;L;;;;;N;;;;; +1B226;NUSHU CHARACTER-1B226;Lo;0;L;;;;;N;;;;; +1B227;NUSHU CHARACTER-1B227;Lo;0;L;;;;;N;;;;; +1B228;NUSHU CHARACTER-1B228;Lo;0;L;;;;;N;;;;; +1B229;NUSHU CHARACTER-1B229;Lo;0;L;;;;;N;;;;; +1B22A;NUSHU CHARACTER-1B22A;Lo;0;L;;;;;N;;;;; +1B22B;NUSHU CHARACTER-1B22B;Lo;0;L;;;;;N;;;;; +1B22C;NUSHU CHARACTER-1B22C;Lo;0;L;;;;;N;;;;; +1B22D;NUSHU CHARACTER-1B22D;Lo;0;L;;;;;N;;;;; +1B22E;NUSHU CHARACTER-1B22E;Lo;0;L;;;;;N;;;;; +1B22F;NUSHU CHARACTER-1B22F;Lo;0;L;;;;;N;;;;; +1B230;NUSHU CHARACTER-1B230;Lo;0;L;;;;;N;;;;; +1B231;NUSHU CHARACTER-1B231;Lo;0;L;;;;;N;;;;; +1B232;NUSHU CHARACTER-1B232;Lo;0;L;;;;;N;;;;; +1B233;NUSHU CHARACTER-1B233;Lo;0;L;;;;;N;;;;; +1B234;NUSHU CHARACTER-1B234;Lo;0;L;;;;;N;;;;; +1B235;NUSHU CHARACTER-1B235;Lo;0;L;;;;;N;;;;; +1B236;NUSHU CHARACTER-1B236;Lo;0;L;;;;;N;;;;; +1B237;NUSHU CHARACTER-1B237;Lo;0;L;;;;;N;;;;; +1B238;NUSHU CHARACTER-1B238;Lo;0;L;;;;;N;;;;; +1B239;NUSHU CHARACTER-1B239;Lo;0;L;;;;;N;;;;; +1B23A;NUSHU CHARACTER-1B23A;Lo;0;L;;;;;N;;;;; +1B23B;NUSHU CHARACTER-1B23B;Lo;0;L;;;;;N;;;;; +1B23C;NUSHU CHARACTER-1B23C;Lo;0;L;;;;;N;;;;; +1B23D;NUSHU CHARACTER-1B23D;Lo;0;L;;;;;N;;;;; +1B23E;NUSHU CHARACTER-1B23E;Lo;0;L;;;;;N;;;;; +1B23F;NUSHU CHARACTER-1B23F;Lo;0;L;;;;;N;;;;; +1B240;NUSHU CHARACTER-1B240;Lo;0;L;;;;;N;;;;; +1B241;NUSHU CHARACTER-1B241;Lo;0;L;;;;;N;;;;; +1B242;NUSHU CHARACTER-1B242;Lo;0;L;;;;;N;;;;; +1B243;NUSHU CHARACTER-1B243;Lo;0;L;;;;;N;;;;; +1B244;NUSHU CHARACTER-1B244;Lo;0;L;;;;;N;;;;; +1B245;NUSHU CHARACTER-1B245;Lo;0;L;;;;;N;;;;; +1B246;NUSHU CHARACTER-1B246;Lo;0;L;;;;;N;;;;; +1B247;NUSHU CHARACTER-1B247;Lo;0;L;;;;;N;;;;; +1B248;NUSHU CHARACTER-1B248;Lo;0;L;;;;;N;;;;; +1B249;NUSHU CHARACTER-1B249;Lo;0;L;;;;;N;;;;; +1B24A;NUSHU CHARACTER-1B24A;Lo;0;L;;;;;N;;;;; +1B24B;NUSHU CHARACTER-1B24B;Lo;0;L;;;;;N;;;;; +1B24C;NUSHU CHARACTER-1B24C;Lo;0;L;;;;;N;;;;; +1B24D;NUSHU CHARACTER-1B24D;Lo;0;L;;;;;N;;;;; +1B24E;NUSHU CHARACTER-1B24E;Lo;0;L;;;;;N;;;;; +1B24F;NUSHU CHARACTER-1B24F;Lo;0;L;;;;;N;;;;; +1B250;NUSHU CHARACTER-1B250;Lo;0;L;;;;;N;;;;; +1B251;NUSHU CHARACTER-1B251;Lo;0;L;;;;;N;;;;; +1B252;NUSHU CHARACTER-1B252;Lo;0;L;;;;;N;;;;; +1B253;NUSHU CHARACTER-1B253;Lo;0;L;;;;;N;;;;; +1B254;NUSHU CHARACTER-1B254;Lo;0;L;;;;;N;;;;; +1B255;NUSHU CHARACTER-1B255;Lo;0;L;;;;;N;;;;; +1B256;NUSHU CHARACTER-1B256;Lo;0;L;;;;;N;;;;; +1B257;NUSHU CHARACTER-1B257;Lo;0;L;;;;;N;;;;; +1B258;NUSHU CHARACTER-1B258;Lo;0;L;;;;;N;;;;; +1B259;NUSHU CHARACTER-1B259;Lo;0;L;;;;;N;;;;; +1B25A;NUSHU CHARACTER-1B25A;Lo;0;L;;;;;N;;;;; +1B25B;NUSHU CHARACTER-1B25B;Lo;0;L;;;;;N;;;;; +1B25C;NUSHU CHARACTER-1B25C;Lo;0;L;;;;;N;;;;; +1B25D;NUSHU CHARACTER-1B25D;Lo;0;L;;;;;N;;;;; +1B25E;NUSHU CHARACTER-1B25E;Lo;0;L;;;;;N;;;;; +1B25F;NUSHU CHARACTER-1B25F;Lo;0;L;;;;;N;;;;; +1B260;NUSHU CHARACTER-1B260;Lo;0;L;;;;;N;;;;; +1B261;NUSHU CHARACTER-1B261;Lo;0;L;;;;;N;;;;; +1B262;NUSHU CHARACTER-1B262;Lo;0;L;;;;;N;;;;; +1B263;NUSHU CHARACTER-1B263;Lo;0;L;;;;;N;;;;; +1B264;NUSHU CHARACTER-1B264;Lo;0;L;;;;;N;;;;; +1B265;NUSHU CHARACTER-1B265;Lo;0;L;;;;;N;;;;; +1B266;NUSHU CHARACTER-1B266;Lo;0;L;;;;;N;;;;; +1B267;NUSHU CHARACTER-1B267;Lo;0;L;;;;;N;;;;; +1B268;NUSHU CHARACTER-1B268;Lo;0;L;;;;;N;;;;; +1B269;NUSHU CHARACTER-1B269;Lo;0;L;;;;;N;;;;; +1B26A;NUSHU CHARACTER-1B26A;Lo;0;L;;;;;N;;;;; +1B26B;NUSHU CHARACTER-1B26B;Lo;0;L;;;;;N;;;;; +1B26C;NUSHU CHARACTER-1B26C;Lo;0;L;;;;;N;;;;; +1B26D;NUSHU CHARACTER-1B26D;Lo;0;L;;;;;N;;;;; +1B26E;NUSHU CHARACTER-1B26E;Lo;0;L;;;;;N;;;;; +1B26F;NUSHU CHARACTER-1B26F;Lo;0;L;;;;;N;;;;; +1B270;NUSHU CHARACTER-1B270;Lo;0;L;;;;;N;;;;; +1B271;NUSHU CHARACTER-1B271;Lo;0;L;;;;;N;;;;; +1B272;NUSHU CHARACTER-1B272;Lo;0;L;;;;;N;;;;; +1B273;NUSHU CHARACTER-1B273;Lo;0;L;;;;;N;;;;; +1B274;NUSHU CHARACTER-1B274;Lo;0;L;;;;;N;;;;; +1B275;NUSHU CHARACTER-1B275;Lo;0;L;;;;;N;;;;; +1B276;NUSHU CHARACTER-1B276;Lo;0;L;;;;;N;;;;; +1B277;NUSHU CHARACTER-1B277;Lo;0;L;;;;;N;;;;; +1B278;NUSHU CHARACTER-1B278;Lo;0;L;;;;;N;;;;; +1B279;NUSHU CHARACTER-1B279;Lo;0;L;;;;;N;;;;; +1B27A;NUSHU CHARACTER-1B27A;Lo;0;L;;;;;N;;;;; +1B27B;NUSHU CHARACTER-1B27B;Lo;0;L;;;;;N;;;;; +1B27C;NUSHU CHARACTER-1B27C;Lo;0;L;;;;;N;;;;; +1B27D;NUSHU CHARACTER-1B27D;Lo;0;L;;;;;N;;;;; +1B27E;NUSHU CHARACTER-1B27E;Lo;0;L;;;;;N;;;;; +1B27F;NUSHU CHARACTER-1B27F;Lo;0;L;;;;;N;;;;; +1B280;NUSHU CHARACTER-1B280;Lo;0;L;;;;;N;;;;; +1B281;NUSHU CHARACTER-1B281;Lo;0;L;;;;;N;;;;; +1B282;NUSHU CHARACTER-1B282;Lo;0;L;;;;;N;;;;; +1B283;NUSHU CHARACTER-1B283;Lo;0;L;;;;;N;;;;; +1B284;NUSHU CHARACTER-1B284;Lo;0;L;;;;;N;;;;; +1B285;NUSHU CHARACTER-1B285;Lo;0;L;;;;;N;;;;; +1B286;NUSHU CHARACTER-1B286;Lo;0;L;;;;;N;;;;; +1B287;NUSHU CHARACTER-1B287;Lo;0;L;;;;;N;;;;; +1B288;NUSHU CHARACTER-1B288;Lo;0;L;;;;;N;;;;; +1B289;NUSHU CHARACTER-1B289;Lo;0;L;;;;;N;;;;; +1B28A;NUSHU CHARACTER-1B28A;Lo;0;L;;;;;N;;;;; +1B28B;NUSHU CHARACTER-1B28B;Lo;0;L;;;;;N;;;;; +1B28C;NUSHU CHARACTER-1B28C;Lo;0;L;;;;;N;;;;; +1B28D;NUSHU CHARACTER-1B28D;Lo;0;L;;;;;N;;;;; +1B28E;NUSHU CHARACTER-1B28E;Lo;0;L;;;;;N;;;;; +1B28F;NUSHU CHARACTER-1B28F;Lo;0;L;;;;;N;;;;; +1B290;NUSHU CHARACTER-1B290;Lo;0;L;;;;;N;;;;; +1B291;NUSHU CHARACTER-1B291;Lo;0;L;;;;;N;;;;; +1B292;NUSHU CHARACTER-1B292;Lo;0;L;;;;;N;;;;; +1B293;NUSHU CHARACTER-1B293;Lo;0;L;;;;;N;;;;; +1B294;NUSHU CHARACTER-1B294;Lo;0;L;;;;;N;;;;; +1B295;NUSHU CHARACTER-1B295;Lo;0;L;;;;;N;;;;; +1B296;NUSHU CHARACTER-1B296;Lo;0;L;;;;;N;;;;; +1B297;NUSHU CHARACTER-1B297;Lo;0;L;;;;;N;;;;; +1B298;NUSHU CHARACTER-1B298;Lo;0;L;;;;;N;;;;; +1B299;NUSHU CHARACTER-1B299;Lo;0;L;;;;;N;;;;; +1B29A;NUSHU CHARACTER-1B29A;Lo;0;L;;;;;N;;;;; +1B29B;NUSHU CHARACTER-1B29B;Lo;0;L;;;;;N;;;;; +1B29C;NUSHU CHARACTER-1B29C;Lo;0;L;;;;;N;;;;; +1B29D;NUSHU CHARACTER-1B29D;Lo;0;L;;;;;N;;;;; +1B29E;NUSHU CHARACTER-1B29E;Lo;0;L;;;;;N;;;;; +1B29F;NUSHU CHARACTER-1B29F;Lo;0;L;;;;;N;;;;; +1B2A0;NUSHU CHARACTER-1B2A0;Lo;0;L;;;;;N;;;;; +1B2A1;NUSHU CHARACTER-1B2A1;Lo;0;L;;;;;N;;;;; +1B2A2;NUSHU CHARACTER-1B2A2;Lo;0;L;;;;;N;;;;; +1B2A3;NUSHU CHARACTER-1B2A3;Lo;0;L;;;;;N;;;;; +1B2A4;NUSHU CHARACTER-1B2A4;Lo;0;L;;;;;N;;;;; +1B2A5;NUSHU CHARACTER-1B2A5;Lo;0;L;;;;;N;;;;; +1B2A6;NUSHU CHARACTER-1B2A6;Lo;0;L;;;;;N;;;;; +1B2A7;NUSHU CHARACTER-1B2A7;Lo;0;L;;;;;N;;;;; +1B2A8;NUSHU CHARACTER-1B2A8;Lo;0;L;;;;;N;;;;; +1B2A9;NUSHU CHARACTER-1B2A9;Lo;0;L;;;;;N;;;;; +1B2AA;NUSHU CHARACTER-1B2AA;Lo;0;L;;;;;N;;;;; +1B2AB;NUSHU CHARACTER-1B2AB;Lo;0;L;;;;;N;;;;; +1B2AC;NUSHU CHARACTER-1B2AC;Lo;0;L;;;;;N;;;;; +1B2AD;NUSHU CHARACTER-1B2AD;Lo;0;L;;;;;N;;;;; +1B2AE;NUSHU CHARACTER-1B2AE;Lo;0;L;;;;;N;;;;; +1B2AF;NUSHU CHARACTER-1B2AF;Lo;0;L;;;;;N;;;;; +1B2B0;NUSHU CHARACTER-1B2B0;Lo;0;L;;;;;N;;;;; +1B2B1;NUSHU CHARACTER-1B2B1;Lo;0;L;;;;;N;;;;; +1B2B2;NUSHU CHARACTER-1B2B2;Lo;0;L;;;;;N;;;;; +1B2B3;NUSHU CHARACTER-1B2B3;Lo;0;L;;;;;N;;;;; +1B2B4;NUSHU CHARACTER-1B2B4;Lo;0;L;;;;;N;;;;; +1B2B5;NUSHU CHARACTER-1B2B5;Lo;0;L;;;;;N;;;;; +1B2B6;NUSHU CHARACTER-1B2B6;Lo;0;L;;;;;N;;;;; +1B2B7;NUSHU CHARACTER-1B2B7;Lo;0;L;;;;;N;;;;; +1B2B8;NUSHU CHARACTER-1B2B8;Lo;0;L;;;;;N;;;;; +1B2B9;NUSHU CHARACTER-1B2B9;Lo;0;L;;;;;N;;;;; +1B2BA;NUSHU CHARACTER-1B2BA;Lo;0;L;;;;;N;;;;; +1B2BB;NUSHU CHARACTER-1B2BB;Lo;0;L;;;;;N;;;;; +1B2BC;NUSHU CHARACTER-1B2BC;Lo;0;L;;;;;N;;;;; +1B2BD;NUSHU CHARACTER-1B2BD;Lo;0;L;;;;;N;;;;; +1B2BE;NUSHU CHARACTER-1B2BE;Lo;0;L;;;;;N;;;;; +1B2BF;NUSHU CHARACTER-1B2BF;Lo;0;L;;;;;N;;;;; +1B2C0;NUSHU CHARACTER-1B2C0;Lo;0;L;;;;;N;;;;; +1B2C1;NUSHU CHARACTER-1B2C1;Lo;0;L;;;;;N;;;;; +1B2C2;NUSHU CHARACTER-1B2C2;Lo;0;L;;;;;N;;;;; +1B2C3;NUSHU CHARACTER-1B2C3;Lo;0;L;;;;;N;;;;; +1B2C4;NUSHU CHARACTER-1B2C4;Lo;0;L;;;;;N;;;;; +1B2C5;NUSHU CHARACTER-1B2C5;Lo;0;L;;;;;N;;;;; +1B2C6;NUSHU CHARACTER-1B2C6;Lo;0;L;;;;;N;;;;; +1B2C7;NUSHU CHARACTER-1B2C7;Lo;0;L;;;;;N;;;;; +1B2C8;NUSHU CHARACTER-1B2C8;Lo;0;L;;;;;N;;;;; +1B2C9;NUSHU CHARACTER-1B2C9;Lo;0;L;;;;;N;;;;; +1B2CA;NUSHU CHARACTER-1B2CA;Lo;0;L;;;;;N;;;;; +1B2CB;NUSHU CHARACTER-1B2CB;Lo;0;L;;;;;N;;;;; +1B2CC;NUSHU CHARACTER-1B2CC;Lo;0;L;;;;;N;;;;; +1B2CD;NUSHU CHARACTER-1B2CD;Lo;0;L;;;;;N;;;;; +1B2CE;NUSHU CHARACTER-1B2CE;Lo;0;L;;;;;N;;;;; +1B2CF;NUSHU CHARACTER-1B2CF;Lo;0;L;;;;;N;;;;; +1B2D0;NUSHU CHARACTER-1B2D0;Lo;0;L;;;;;N;;;;; +1B2D1;NUSHU CHARACTER-1B2D1;Lo;0;L;;;;;N;;;;; +1B2D2;NUSHU CHARACTER-1B2D2;Lo;0;L;;;;;N;;;;; +1B2D3;NUSHU CHARACTER-1B2D3;Lo;0;L;;;;;N;;;;; +1B2D4;NUSHU CHARACTER-1B2D4;Lo;0;L;;;;;N;;;;; +1B2D5;NUSHU CHARACTER-1B2D5;Lo;0;L;;;;;N;;;;; +1B2D6;NUSHU CHARACTER-1B2D6;Lo;0;L;;;;;N;;;;; +1B2D7;NUSHU CHARACTER-1B2D7;Lo;0;L;;;;;N;;;;; +1B2D8;NUSHU CHARACTER-1B2D8;Lo;0;L;;;;;N;;;;; +1B2D9;NUSHU CHARACTER-1B2D9;Lo;0;L;;;;;N;;;;; +1B2DA;NUSHU CHARACTER-1B2DA;Lo;0;L;;;;;N;;;;; +1B2DB;NUSHU CHARACTER-1B2DB;Lo;0;L;;;;;N;;;;; +1B2DC;NUSHU CHARACTER-1B2DC;Lo;0;L;;;;;N;;;;; +1B2DD;NUSHU CHARACTER-1B2DD;Lo;0;L;;;;;N;;;;; +1B2DE;NUSHU CHARACTER-1B2DE;Lo;0;L;;;;;N;;;;; +1B2DF;NUSHU CHARACTER-1B2DF;Lo;0;L;;;;;N;;;;; +1B2E0;NUSHU CHARACTER-1B2E0;Lo;0;L;;;;;N;;;;; +1B2E1;NUSHU CHARACTER-1B2E1;Lo;0;L;;;;;N;;;;; +1B2E2;NUSHU CHARACTER-1B2E2;Lo;0;L;;;;;N;;;;; +1B2E3;NUSHU CHARACTER-1B2E3;Lo;0;L;;;;;N;;;;; +1B2E4;NUSHU CHARACTER-1B2E4;Lo;0;L;;;;;N;;;;; +1B2E5;NUSHU CHARACTER-1B2E5;Lo;0;L;;;;;N;;;;; +1B2E6;NUSHU CHARACTER-1B2E6;Lo;0;L;;;;;N;;;;; +1B2E7;NUSHU CHARACTER-1B2E7;Lo;0;L;;;;;N;;;;; +1B2E8;NUSHU CHARACTER-1B2E8;Lo;0;L;;;;;N;;;;; +1B2E9;NUSHU CHARACTER-1B2E9;Lo;0;L;;;;;N;;;;; +1B2EA;NUSHU CHARACTER-1B2EA;Lo;0;L;;;;;N;;;;; +1B2EB;NUSHU CHARACTER-1B2EB;Lo;0;L;;;;;N;;;;; +1B2EC;NUSHU CHARACTER-1B2EC;Lo;0;L;;;;;N;;;;; +1B2ED;NUSHU CHARACTER-1B2ED;Lo;0;L;;;;;N;;;;; +1B2EE;NUSHU CHARACTER-1B2EE;Lo;0;L;;;;;N;;;;; +1B2EF;NUSHU CHARACTER-1B2EF;Lo;0;L;;;;;N;;;;; +1B2F0;NUSHU CHARACTER-1B2F0;Lo;0;L;;;;;N;;;;; +1B2F1;NUSHU CHARACTER-1B2F1;Lo;0;L;;;;;N;;;;; +1B2F2;NUSHU CHARACTER-1B2F2;Lo;0;L;;;;;N;;;;; +1B2F3;NUSHU CHARACTER-1B2F3;Lo;0;L;;;;;N;;;;; +1B2F4;NUSHU CHARACTER-1B2F4;Lo;0;L;;;;;N;;;;; +1B2F5;NUSHU CHARACTER-1B2F5;Lo;0;L;;;;;N;;;;; +1B2F6;NUSHU CHARACTER-1B2F6;Lo;0;L;;;;;N;;;;; +1B2F7;NUSHU CHARACTER-1B2F7;Lo;0;L;;;;;N;;;;; +1B2F8;NUSHU CHARACTER-1B2F8;Lo;0;L;;;;;N;;;;; +1B2F9;NUSHU CHARACTER-1B2F9;Lo;0;L;;;;;N;;;;; +1B2FA;NUSHU CHARACTER-1B2FA;Lo;0;L;;;;;N;;;;; +1B2FB;NUSHU CHARACTER-1B2FB;Lo;0;L;;;;;N;;;;; 1BC00;DUPLOYAN LETTER H;Lo;0;L;;;;;N;;;;; 1BC01;DUPLOYAN LETTER X;Lo;0;L;;;;;N;;;;; 1BC02;DUPLOYAN LETTER P;Lo;0;L;;;;;N;;;;; @@ -28269,6 +29217,12 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F248;TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557;So;0;L;<compat> 3014 6557 3015;;;;N;;;;; 1F250;CIRCLED IDEOGRAPH ADVANTAGE;So;0;L;<circle> 5F97;;;;N;;;;; 1F251;CIRCLED IDEOGRAPH ACCEPT;So;0;L;<circle> 53EF;;;;N;;;;; +1F260;ROUNDED SYMBOL FOR FU;So;0;ON;;;;;N;;;;; +1F261;ROUNDED SYMBOL FOR LU;So;0;ON;;;;;N;;;;; +1F262;ROUNDED SYMBOL FOR SHOU;So;0;ON;;;;;N;;;;; +1F263;ROUNDED SYMBOL FOR XI;So;0;ON;;;;;N;;;;; +1F264;ROUNDED SYMBOL FOR SHUANGXI;So;0;ON;;;;;N;;;;; +1F265;ROUNDED SYMBOL FOR CAI;So;0;ON;;;;;N;;;;; 1F300;CYCLONE;So;0;ON;;;;;N;;;;; 1F301;FOGGY;So;0;ON;;;;;N;;;;; 1F302;CLOSED UMBRELLA;So;0;ON;;;;;N;;;;; @@ -29248,6 +30202,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F6D0;PLACE OF WORSHIP;So;0;ON;;;;;N;;;;; 1F6D1;OCTAGONAL SIGN;So;0;ON;;;;;N;;;;; 1F6D2;SHOPPING TROLLEY;So;0;ON;;;;;N;;;;; +1F6D3;STUPA;So;0;ON;;;;;N;;;;; +1F6D4;PAGODA;So;0;ON;;;;;N;;;;; 1F6E0;HAMMER AND WRENCH;So;0;ON;;;;;N;;;;; 1F6E1;SHIELD;So;0;ON;;;;;N;;;;; 1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;; @@ -29268,6 +30224,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F6F4;SCOOTER;So;0;ON;;;;;N;;;;; 1F6F5;MOTOR SCOOTER;So;0;ON;;;;;N;;;;; 1F6F6;CANOE;So;0;ON;;;;;N;;;;; +1F6F7;SLED;So;0;ON;;;;;N;;;;; +1F6F8;FLYING SAUCER;So;0;ON;;;;;N;;;;; 1F700;ALCHEMICAL SYMBOL FOR QUINTESSENCE;So;0;ON;;;;;N;;;;; 1F701;ALCHEMICAL SYMBOL FOR AIR;So;0;ON;;;;;N;;;;; 1F702;ALCHEMICAL SYMBOL FOR FIRE;So;0;ON;;;;;N;;;;; @@ -29617,6 +30575,18 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F8AB;RIGHTWARDS FRONT-TILTED SHADOWED WHITE ARROW;So;0;ON;;;;;N;;;;; 1F8AC;WHITE ARROW SHAFT WIDTH ONE;So;0;ON;;;;;N;;;;; 1F8AD;WHITE ARROW SHAFT WIDTH TWO THIRDS;So;0;ON;;;;;N;;;;; +1F900;CIRCLED CROSS FORMEE WITH FOUR DOTS;So;0;ON;;;;;N;;;;; +1F901;CIRCLED CROSS FORMEE WITH TWO DOTS;So;0;ON;;;;;N;;;;; +1F902;CIRCLED CROSS FORMEE;So;0;ON;;;;;N;;;;; +1F903;LEFT HALF CIRCLE WITH FOUR DOTS;So;0;ON;;;;;N;;;;; +1F904;LEFT HALF CIRCLE WITH THREE DOTS;So;0;ON;;;;;N;;;;; +1F905;LEFT HALF CIRCLE WITH TWO DOTS;So;0;ON;;;;;N;;;;; +1F906;LEFT HALF CIRCLE WITH DOT;So;0;ON;;;;;N;;;;; +1F907;LEFT HALF CIRCLE;So;0;ON;;;;;N;;;;; +1F908;DOWNWARD FACING HOOK;So;0;ON;;;;;N;;;;; +1F909;DOWNWARD FACING NOTCHED HOOK;So;0;ON;;;;;N;;;;; +1F90A;DOWNWARD FACING HOOK WITH DOT;So;0;ON;;;;;N;;;;; +1F90B;DOWNWARD FACING NOTCHED HOOK WITH DOT;So;0;ON;;;;;N;;;;; 1F910;ZIPPER-MOUTH FACE;So;0;ON;;;;;N;;;;; 1F911;MONEY-MOUTH FACE;So;0;ON;;;;;N;;;;; 1F912;FACE WITH THERMOMETER;So;0;ON;;;;;N;;;;; @@ -29632,6 +30602,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F91C;RIGHT-FACING FIST;So;0;ON;;;;;N;;;;; 1F91D;HANDSHAKE;So;0;ON;;;;;N;;;;; 1F91E;HAND WITH INDEX AND MIDDLE FINGERS CROSSED;So;0;ON;;;;;N;;;;; +1F91F;I LOVE YOU HAND SIGN;So;0;ON;;;;;N;;;;; 1F920;FACE WITH COWBOY HAT;So;0;ON;;;;;N;;;;; 1F921;CLOWN FACE;So;0;ON;;;;;N;;;;; 1F922;NAUSEATED FACE;So;0;ON;;;;;N;;;;; @@ -29640,7 +30611,17 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F925;LYING FACE;So;0;ON;;;;;N;;;;; 1F926;FACE PALM;So;0;ON;;;;;N;;;;; 1F927;SNEEZING FACE;So;0;ON;;;;;N;;;;; +1F928;FACE WITH ONE EYEBROW RAISED;So;0;ON;;;;;N;;;;; +1F929;GRINNING FACE WITH STAR EYES;So;0;ON;;;;;N;;;;; +1F92A;GRINNING FACE WITH ONE LARGE AND ONE SMALL EYE;So;0;ON;;;;;N;;;;; +1F92B;FACE WITH FINGER COVERING CLOSED LIPS;So;0;ON;;;;;N;;;;; +1F92C;SERIOUS FACE WITH SYMBOLS COVERING MOUTH;So;0;ON;;;;;N;;;;; +1F92D;SMILING FACE WITH SMILING EYES AND HAND COVERING MOUTH;So;0;ON;;;;;N;;;;; +1F92E;FACE WITH OPEN MOUTH VOMITING;So;0;ON;;;;;N;;;;; +1F92F;SHOCKED FACE WITH EXPLODING HEAD;So;0;ON;;;;;N;;;;; 1F930;PREGNANT WOMAN;So;0;ON;;;;;N;;;;; +1F931;BREAST-FEEDING;So;0;ON;;;;;N;;;;; +1F932;PALMS UP TOGETHER;So;0;ON;;;;;N;;;;; 1F933;SELFIE;So;0;ON;;;;;N;;;;; 1F934;PRINCE;So;0;ON;;;;;N;;;;; 1F935;MAN IN TUXEDO;So;0;ON;;;;;N;;;;; @@ -29665,6 +30646,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F949;THIRD PLACE MEDAL;So;0;ON;;;;;N;;;;; 1F94A;BOXING GLOVE;So;0;ON;;;;;N;;;;; 1F94B;MARTIAL ARTS UNIFORM;So;0;ON;;;;;N;;;;; +1F94C;CURLING STONE;So;0;ON;;;;;N;;;;; 1F950;CROISSANT;So;0;ON;;;;;N;;;;; 1F951;AVOCADO;So;0;ON;;;;;N;;;;; 1F952;CUCUMBER;So;0;ON;;;;;N;;;;; @@ -29680,6 +30662,19 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F95C;PEANUTS;So;0;ON;;;;;N;;;;; 1F95D;KIWIFRUIT;So;0;ON;;;;;N;;;;; 1F95E;PANCAKES;So;0;ON;;;;;N;;;;; +1F95F;DUMPLING;So;0;ON;;;;;N;;;;; +1F960;FORTUNE COOKIE;So;0;ON;;;;;N;;;;; +1F961;TAKEOUT BOX;So;0;ON;;;;;N;;;;; +1F962;CHOPSTICKS;So;0;ON;;;;;N;;;;; +1F963;BOWL WITH SPOON;So;0;ON;;;;;N;;;;; +1F964;CUP WITH STRAW;So;0;ON;;;;;N;;;;; +1F965;COCONUT;So;0;ON;;;;;N;;;;; +1F966;BROCCOLI;So;0;ON;;;;;N;;;;; +1F967;PIE;So;0;ON;;;;;N;;;;; +1F968;PRETZEL;So;0;ON;;;;;N;;;;; +1F969;CUT OF MEAT;So;0;ON;;;;;N;;;;; +1F96A;SANDWICH;So;0;ON;;;;;N;;;;; +1F96B;CANNED FOOD;So;0;ON;;;;;N;;;;; 1F980;CRAB;So;0;ON;;;;;N;;;;; 1F981;LION FACE;So;0;ON;;;;;N;;;;; 1F982;SCORPION;So;0;ON;;;;;N;;;;; @@ -29698,7 +30693,36 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F98F;RHINOCEROS;So;0;ON;;;;;N;;;;; 1F990;SHRIMP;So;0;ON;;;;;N;;;;; 1F991;SQUID;So;0;ON;;;;;N;;;;; +1F992;GIRAFFE FACE;So;0;ON;;;;;N;;;;; +1F993;ZEBRA FACE;So;0;ON;;;;;N;;;;; +1F994;HEDGEHOG;So;0;ON;;;;;N;;;;; +1F995;SAUROPOD;So;0;ON;;;;;N;;;;; +1F996;T-REX;So;0;ON;;;;;N;;;;; +1F997;CRICKET;So;0;ON;;;;;N;;;;; 1F9C0;CHEESE WEDGE;So;0;ON;;;;;N;;;;; +1F9D0;FACE WITH MONOCLE;So;0;ON;;;;;N;;;;; +1F9D1;ADULT;So;0;ON;;;;;N;;;;; +1F9D2;CHILD;So;0;ON;;;;;N;;;;; +1F9D3;OLDER ADULT;So;0;ON;;;;;N;;;;; +1F9D4;BEARDED PERSON;So;0;ON;;;;;N;;;;; +1F9D5;PERSON WITH HEADSCARF;So;0;ON;;;;;N;;;;; +1F9D6;PERSON IN STEAMY ROOM;So;0;ON;;;;;N;;;;; +1F9D7;PERSON CLIMBING;So;0;ON;;;;;N;;;;; +1F9D8;PERSON IN LOTUS POSITION;So;0;ON;;;;;N;;;;; +1F9D9;MAGE;So;0;ON;;;;;N;;;;; +1F9DA;FAIRY;So;0;ON;;;;;N;;;;; +1F9DB;VAMPIRE;So;0;ON;;;;;N;;;;; +1F9DC;MERPERSON;So;0;ON;;;;;N;;;;; +1F9DD;ELF;So;0;ON;;;;;N;;;;; +1F9DE;GENIE;So;0;ON;;;;;N;;;;; +1F9DF;ZOMBIE;So;0;ON;;;;;N;;;;; +1F9E0;BRAIN;So;0;ON;;;;;N;;;;; +1F9E1;ORANGE HEART;So;0;ON;;;;;N;;;;; +1F9E2;BILLED CAP;So;0;ON;;;;;N;;;;; +1F9E3;SCARF;So;0;ON;;;;;N;;;;; +1F9E4;GLOVES;So;0;ON;;;;;N;;;;; +1F9E5;COAT;So;0;ON;;;;;N;;;;; +1F9E6;SOCKS;So;0;ON;;;;;N;;;;; 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;; 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;; @@ -29707,6 +30731,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;; 2B820;<CJK Ideograph Extension E, First>;Lo;0;L;;;;;N;;;;; 2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;; +2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;; +2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;; 2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; 2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;; 2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;; diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript index fefd7d3b70..674e5a0628 100755 --- a/lib/stdlib/uc_spec/gen_unicode_mod.escript +++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript @@ -65,7 +65,7 @@ main(_) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% parse_unicode_data(Line0, Acc) -> - Line = string:strip(Line0, right, $\n), + Line = string:chomp(Line0), [CodePoint,Name,_Cat,Class,_BiDi,Decomp, _N1,_N2,_N3,_BDMirror,_Uni1,_Iso|Case] = tokens(Line, ";"), {Dec,Comp} = case to_decomp(Decomp) of @@ -78,14 +78,14 @@ parse_unicode_data(Line0, Acc) -> |Acc]. to_class(String) -> - list_to_integer(string:strip(String, both)). + list_to_integer(string:trim(String, both)). to_decomp("") -> []; to_decomp("<" ++ Str) -> - [Tag,Rest] = string:tokens(Str, ">"), + [Tag,Rest] = string:lexemes(Str, ">"), {list_to_atom(Tag), to_decomp(Rest)}; to_decomp(CodePoints) -> - CPL = string:tokens(CodePoints, " "), + CPL = string:lexemes(CodePoints, " "), [hex_to_int(CP) || CP <- CPL]. to_case(["","",""]) -> []; @@ -105,20 +105,20 @@ parse_special_casing(Line, Table) -> array:set(CP, Entry#cp{cs=Case}, Table). to_scase([Lower,Title,Upper|_]) -> - {unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Upper, " "), both)]), - unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Lower, " "), both)]), - unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Title, " "), both)]), + {unlist([hex_to_int(CP) || CP <- string:lexemes(Upper, " ")]), + unlist([hex_to_int(CP) || CP <- string:lexemes(Lower, " ")]), + unlist([hex_to_int(CP) || CP <- string:lexemes(Title, " ")]), []}. parse_case_folding(Line, Table) -> [CodePoint, Class0, CaseStr |_Comments] = tokens(Line, ";"), - Class = string:strip(Class0, both), + Class = string:trim(Class0, both), if Class =:= "T" -> Table; %% Do not support localization yet Class =:= "S" -> Table; %% Ignore simple true -> CP = hex_to_int(CodePoint), Case = unlist([hex_to_int(CPC) || - CPC <- string:strip(string:tokens(CaseStr, " "), both)]), + CPC <- string:lexemes(CaseStr, " ")]), #cp{cs={U,L,T,_}} = Entry = array:get(CP, Table), array:set(CP, Entry#cp{cs={U,L,T,Case}}, Table) end. @@ -186,7 +186,7 @@ gen_static(Fd) -> " {U,L} -> #{upper=>U,lower=>L,title=>U,fold=>L};\n" " {U,L,T,F} -> #{upper=>U,lower=>L,title=>T,fold=>F}\n" " end.\n\n"), - io:put_chars(Fd, "spec_version() -> {9,0}.\n\n\n"), + io:put_chars(Fd, "spec_version() -> {10,0}.\n\n\n"), io:put_chars(Fd, "class(Codepoint) -> {CCC,_,_} = unicode_table(Codepoint),\n CCC.\n\n"), io:put_chars(Fd, "-spec uppercase(unicode:chardata()) -> " "maybe_improper_list(gc(),unicode:chardata()).\n"), @@ -869,10 +869,10 @@ optimize_ranges_1(Rs) -> hex_to_int([]) -> []; hex_to_int(HexStr) -> - list_to_integer(string:strip(HexStr, both), 16). + list_to_integer(string:trim(HexStr, both), 16). to_atom(Str) -> - list_to_atom(string:to_lower(string:strip(Str, both))). + list_to_atom(string:lowercase(string:trim(Str, both))). foldl(Fun, Acc, Fd) -> Get = fun() -> file:read_line(Fd) end, @@ -892,7 +892,7 @@ foldl_1(Fun, Acc, Get) -> -%% Differs from string:tokens, it returns empty string as token between two delimiters +%% Differs from string:lexemes, it returns empty string as token between two delimiters tokens(S, [C]) -> tokens(lists:reverse(S), C, []). |