diff options
Diffstat (limited to 'lib/stdlib')
41 files changed, 833 insertions, 788 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 26602764a6..93eac8220d 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 1997-2016. All Rights Reserved. +# Copyright Ericsson AB 1997-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -103,7 +103,7 @@ XML_REF3_FILES = \ XML_REF6_FILES = stdlib_app.xml -XML_PART_FILES = part.xml part_notes.xml part_notes_history.xml +XML_PART_FILES = part.xml XML_CHAPTER_FILES = io_protocol.xml unicode_usage.xml notes.xml notes_history.xml assert_hrl.xml BOOK_FILES = book.xml @@ -131,9 +131,9 @@ SPECS_FILES = $(XML_REF3_FILES:%.xml=$(SPECDIR)/specs_%.xml) TOP_SPECS_FILE = specs.xml # ---------------------------------------------------- -# FLAGS +# FLAGS # ---------------------------------------------------- -XML_FLAGS += +XML_FLAGS += SPECS_FLAGS = -I../../include -I../../../kernel/include @@ -150,7 +150,7 @@ html: $(HTML_REF_MAN_FILE) man: $(MAN3_FILES) $(MAN6_FILES) -debug opt: +debug opt: clean clean_docs: rm -rf $(HTMLDIR)/* @@ -158,7 +158,7 @@ clean clean_docs: rm -f $(MAN6DIR)/* rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) rm -f $(SPECDIR)/* - rm -f errs core *~ + rm -f errs core *~ $(SPECDIR)/specs_erl_id_trans.xml: escript $(SPECS_EXTRACTOR) $(SPECS_FLAGS) \ @@ -166,7 +166,7 @@ $(SPECDIR)/specs_erl_id_trans.xml: # ---------------------------------------------------- # Release Target -# ---------------------------------------------------- +# ---------------------------------------------------- include $(ERL_TOP)/make/otp_release_targets.mk release_docs_spec: docs diff --git a/lib/stdlib/doc/src/c.xml b/lib/stdlib/doc/src/c.xml index 7666699183..697e1715e7 100644 --- a/lib/stdlib/doc/src/c.xml +++ b/lib/stdlib/doc/src/c.xml @@ -94,6 +94,15 @@ </func> <func> + <name name="erlangrc" arity="1"/> + <fsummary>Load an erlang resource file.</fsummary> + <desc> + <p>Search <c>PathList</c> and load <c>.erlang</c> resource file if + found.</p> + </desc> + </func> + + <func> <name name="flush" arity="0"/> <fsummary>Flush any messages sent to the shell.</fsummary> <desc> diff --git a/lib/stdlib/doc/src/ets.xml b/lib/stdlib/doc/src/ets.xml index 95af2b77a5..576959b1c8 100644 --- a/lib/stdlib/doc/src/ets.xml +++ b/lib/stdlib/doc/src/ets.xml @@ -325,7 +325,7 @@ <p><c><anno>Acc0</anno></c> is returned if the table is empty. This function is similar to <seealso marker="lists#foldl/3"><c>lists:foldl/3</c></seealso>. - The table elements are traversed is unspecified order, except for + The table elements are traversed in an unspecified order, except for <c>ordered_set</c> tables, where they are traversed first to last.</p> <p>If <c><anno>Function</anno></c> inserts objects into the table, or another @@ -341,7 +341,7 @@ <p><c><anno>Acc0</anno></c> is returned if the table is empty. This function is similar to <seealso marker="lists#foldr/3"><c>lists:foldr/3</c></seealso>. - The table elements are traversed is unspecified order, except for + The table elements are traversed in an unspecified order, except for <c>ordered_set</c> tables, where they are traversed last to first.</p> <p>If <c><anno>Function</anno></c> inserts objects into the table, or another diff --git a/lib/stdlib/doc/src/fascicules.xml b/lib/stdlib/doc/src/fascicules.xml deleted file mode 100644 index 0ded9007e0..0000000000 --- a/lib/stdlib/doc/src/fascicules.xml +++ /dev/null @@ -1,18 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE fascicules SYSTEM "fascicules.dtd"> - -<fascicules> - <fascicule file="part" href="part_frame.html" entry="no"> - STDLIB User's Guide - </fascicule> - <fascicule file="ref_man" href="ref_man_frame.html" entry="yes"> - Reference Manual - </fascicule> - <fascicule file="part_notes" href="part_notes_frame.html" entry="no"> - Release Notes - </fascicule> - <fascicule file="" href="../../../../doc/print.html" entry="no"> - Off-Print - </fascicule> -</fascicules> - diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 80c4acffdb..5e631aac21 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -45,6 +45,30 @@ <p>For more information about raw filenames, see the <seealso marker="kernel:file"><c>file</c></seealso> module.</p> + + <note> + <p> + Functionality in this module generally assumes valid input and + does not necessarily fail on input that does not use a valid + encoding, but may instead very likely produce invalid output. + </p> + <p> + File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now <em>rejected</em> and will cause primitive + file operations to fail. + </p> + </note> + <warning><p> + Currently null characters at the end of the filename + will be accepted by primitive file operations. Such + filenames are however still documented as invalid. The + implementation will also change in the future and + reject such filenames. + </p></warning> </description> <datatypes> @@ -193,6 +217,11 @@ <p>Other characters represent themselves. Only filenames that have exactly the same character in the same position match. Matching is case-sensitive, for example, "a" does not match "A".</p> + <p>Directory separators must always be written as <c>/</c>, even on + Windows.</p> + <p>A character preceded by <c>\</c> loses its special meaning. Note + that <c>\</c> must be written as <c>\\</c> in a string literal. + For example, "\\?*" will match any filename starting with <c>?</c>.</p> <p>Notice that multiple "*" characters are allowed (as in Unix wildcards, but opposed to Windows/DOS wildcards).</p> <p><em>Examples:</em></p> diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml index 14fd5ef787..d2608ad542 100644 --- a/lib/stdlib/doc/src/filename.xml +++ b/lib/stdlib/doc/src/filename.xml @@ -46,7 +46,10 @@ filename by removing redundant directory separators, use <seealso marker="#join/1"><c>join/1</c></seealso>.</p> - <p>The module supports raw filenames in the way that if a binary is + <p> + The module supports + <seealso marker="unicode_usage#notes-about-raw-filenames">raw + filenames</seealso> in the way that if a binary is present, or the filename cannot be interpreted according to the return value of <seealso marker="kernel:file#native_name_encoding/0"> <c>file:native_name_encoding/0</c></seealso>, a raw filename is also @@ -56,6 +59,30 @@ (the join operation is performed of course). For more information about raw filenames, see the <seealso marker="kernel:file"><c>file</c></seealso> module.</p> + + <note> + <p> + Functionality in this module generally assumes valid input and + does not necessarily fail on input that does not use a valid + encoding, but may instead very likely produce invalid output. + </p> + <p> + File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now <em>rejected</em> and will cause primitive + file operations to fail. + </p> + </note> + <warning><p> + Currently null characters at the end of the filename + will be accepted by primitive file operations. Such + filenames are however still documented as invalid. The + implementation will also change in the future and + reject such filenames. + </p></warning> </description> <datatypes> <datatype> @@ -555,6 +582,7 @@ unsafe</pre> ["a:/","msdev","include"]</pre> </desc> </func> + </funcs> </erlref> diff --git a/lib/stdlib/doc/src/gen_statem.xml b/lib/stdlib/doc/src/gen_statem.xml index 8de6ed754f..4a824f073e 100644 --- a/lib/stdlib/doc/src/gen_statem.xml +++ b/lib/stdlib/doc/src/gen_statem.xml @@ -1329,7 +1329,7 @@ handle_event(_, _, State, Data) -> <c><anno>T</anno></c> is the time-out time. <c>{clean_timeout,<anno>T</anno>}</c> works like just <c>T</c> described in the note above - and uses a proxy process for <c>T < infinity</c>, + and uses a proxy process while <c>{dirty_timeout,<anno>T</anno>}</c> bypasses the proxy process which is more lightweight. </p> @@ -1339,8 +1339,12 @@ handle_event(_, _, State, Data) -> with <c>{dirty_timeout,<anno>T</anno>}</c> to avoid that the calling process dies when the call times out, you will have to be prepared to handle - a late reply. - So why not just let the calling process die? + a late reply. Note that there is an odd chance + to get a late reply even with + <c>{dirty_timeout,infinity}</c> or <c>infinity</c> + for example in the event of network problems. + So why not just let the calling process die + by not catching the exception? </p> </note> <p> diff --git a/lib/stdlib/doc/src/notes.xml b/lib/stdlib/doc/src/notes.xml index 604d758db3..d396f1de8f 100644 --- a/lib/stdlib/doc/src/notes.xml +++ b/lib/stdlib/doc/src/notes.xml @@ -31,6 +31,56 @@ </header> <p>This document describes the changes made to the STDLIB application.</p> +<section><title>STDLIB 3.4.2</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> Fix a bug in the Erlang shell where recursively + defined records with typed fields could cause a loop. + </p> + <p> + Own Id: OTP-14488 Aux Id: PR-1489 </p> + </item> + <item> + <p> + Make edlin handle grapheme clusters instead of codepoints + to improve the handling multi-codepoints characters.</p> + <p> + Own Id: OTP-14542</p> + </item> + <item> + <p>There could be false warnings for + <c>erlang:get_stacktrace/0</c> being used outside of a + <c>try</c> block when using multiple <c>catch</c> + clauses.</p> + <p> + Own Id: OTP-14600 Aux Id: ERL-478 </p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> The Erlang code linter no longer checks that the + functions mentioned in <c>nowarn_deprecated_function</c> + options are declared in the module. </p> + <p> + Own Id: OTP-14378</p> + </item> + <item> + <p> + General Unicode improvements.</p> + <p> + Own Id: OTP-14462</p> + </item> + </list> + </section> + +</section> + <section><title>STDLIB 3.4.1</title> <section><title>Fixed Bugs and Malfunctions</title> diff --git a/lib/stdlib/doc/src/part_notes.xml b/lib/stdlib/doc/src/part_notes.xml deleted file mode 100644 index 461de749dd..0000000000 --- a/lib/stdlib/doc/src/part_notes.xml +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE part SYSTEM "part.dtd"> - -<part xmlns:xi="http://www.w3.org/2001/XInclude"> - <header> - <copyright> - <year>2004</year><year>2016</year> - <holder>Ericsson AB. All Rights Reserved.</holder> - </copyright> - <legalnotice> - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - </legalnotice> - - <title>STDLIB Release Notes</title> - <prepared></prepared> - <docno></docno> - <date></date> - <rev></rev> - </header> - <description> - <p>The Standard Erlang Libraries application, <em>STDLIB</em>, - contains modules for manipulating lists, strings and files etc.</p> - <p>For information about older versions, see - <url href="part_notes_history_frame.html">Release Notes History</url>.</p> - </description> - <xi:include href="notes.xml"/> -</part> - diff --git a/lib/stdlib/doc/src/part_notes_history.xml b/lib/stdlib/doc/src/part_notes_history.xml deleted file mode 100644 index 8fd048a41e..0000000000 --- a/lib/stdlib/doc/src/part_notes_history.xml +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE part SYSTEM "part.dtd"> - -<part> - <header> - <copyright> - <year>2006</year> - <year>2016</year> - <holder>Ericsson AB, All Rights Reserved</holder> - </copyright> - <legalnotice> - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - The Initial Developer of the Original Code is Ericsson AB. - </legalnotice> - - <title>STDLIB Release Notes History</title> - <prepared></prepared> - <docno></docno> - <date></date> - <rev></rev> - </header> - <description> - <p>The Standard Erlang Libraries application, <em>STDLIB</em>, - contains modules for manipulating lists, strings and files etc.</p> - </description> - <include file="notes_history"></include> -</part> - diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml index a68fb7d55f..89fb858823 100644 --- a/lib/stdlib/doc/src/rand.xml +++ b/lib/stdlib/doc/src/rand.xml @@ -35,12 +35,19 @@ <module>rand</module> <modulesummary>Pseudo random number generation.</modulesummary> <description> - <p>This module provides a random number generator. The module contains - a number of algorithms. The uniform distribution algorithms use the - <url href="http://xorshift.di.unimi.it">scrambled Xorshift algorithms by - Sebastiano Vigna</url>. The normal distribution algorithm uses the - <url href="http://www.jstatsoft.org/v05/i08">Ziggurat Method by Marsaglia - and Tsang</url>.</p> + <p> + This module provides a pseudo random number generator. + The module contains a number of algorithms. + The uniform distribution algorithms use the + <url href="http://xorshift.di.unimi.it"> + xoroshiro116+ and xorshift1024* algorithms by Sebastiano Vigna. + </url> + The normal distribution algorithm uses the + <url href="http://www.jstatsoft.org/v05/i08"> + Ziggurat Method by Marsaglia and Tsang + </url> + on top of the uniform distribution algorithm. + </p> <p>For some algorithms, jump functions are provided for generating non-overlapping sequences for parallel computations. The jump functions perform calculations @@ -393,9 +400,34 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre> <name name="uniform" arity="0"/> <fsummary>Return a random float.</fsummary> <desc><marker id="uniform-0"/> - <p>Returns a random float uniformly distributed in the value + <p> + Returns a random float uniformly distributed in the value range <c>0.0 =< <anno>X</anno> < 1.0</c> and - updates the state in the process dictionary.</p> + updates the state in the process dictionary. + </p> + <p> + The generated numbers are on the form N * 2.0^(-53), + that is; equally spaced in the interval. + </p> + <warning> + <p> + This function may return exactly <c>0.0</c> which can be + fatal for certain applications. If that is undesired + you can use <c>(1.0 - rand:uniform())</c> to get the + interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + </p> + <p> + If neither endpoint is desired you can test and re-try + like this: + </p> + <pre> +my_uniform() -> + case rand:uniform() of + 0.0 -> my_uniform(); + X -> X + end +end.</pre> + </warning> </desc> </func> @@ -414,9 +446,34 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre> <name name="uniform_s" arity="1"/> <fsummary>Return a random float.</fsummary> <desc> - <p>Returns, for a specified state, random float + <p> + Returns, for a specified state, random float uniformly distributed in the value range <c>0.0 =< - <anno>X</anno> < 1.0</c> and a new state.</p> + <anno>X</anno> < 1.0</c> and a new state. + </p> + <p> + The generated numbers are on the form N * 2.0^(-53), + that is; equally spaced in the interval. + </p> + <warning> + <p> + This function may return exactly <c>0.0</c> which can be + fatal for certain applications. If that is undesired + you can use <c>(1.0 - rand:uniform(State))</c> to get the + interval <c>0.0 < <anno>X</anno> =< 1.0</c>. + </p> + <p> + If neither endpoint is desired you can test and re-try + like this: + </p> + <pre> +my_uniform(State) -> + case rand:uniform(State) of + {0.0, NewState} -> my_uniform(NewState); + Result -> Result + end +end.</pre> + </warning> </desc> </func> diff --git a/lib/stdlib/doc/src/string.xml b/lib/stdlib/doc/src/string.xml index 9d5edd9ecf..130fc74a28 100644 --- a/lib/stdlib/doc/src/string.xml +++ b/lib/stdlib/doc/src/string.xml @@ -109,10 +109,8 @@ <p>This module has been reworked in Erlang/OTP 20 to handle <seealso marker="unicode#type-chardata"> <c>unicode:chardata()</c></seealso> and operate on grapheme - clusters. The <seealso marker="#oldapi"> <c>old - functions</c></seealso> that only work on Latin-1 lists as input - are still available but should not be - used. They will be deprecated in Erlang/OTP 21. + clusters. The <c>old functions</c> that only work on Latin-1 lists as input + are kept for backwards compatibility reasons but should not be used. </p> </description> @@ -594,7 +592,7 @@ ÖÄÅ</pre> or <c>both</c>, indicates from which direction characters are to be removed. </p> - <p> Default <c><anno>Characters</anno></c> are the set of + <p> Default <c><anno>Characters</anno></c> is the set of nonbreakable whitespace codepoints, defined as Pattern_White_Space in <url href="http://unicode.org/reports/tr31/">Unicode Standard Annex #31</url>. @@ -631,393 +629,5 @@ ÖÄÅ</pre> </func> </funcs> - - <section> - <marker id="oldapi"/> - <title>Obsolete API functions</title> - <p>Here follows the function of the old API. - These functions only work on a list of Latin-1 characters. - </p> - <note><p> - The functions are kept for backward compatibility, but are - not recommended. - They will be deprecated in Erlang/OTP 21. - </p> - <p>Any undocumented functions in <c>string</c> are not to be used.</p> - </note> - </section> - - <funcs> - <func> - <name name="centre" arity="2"/> - <name name="centre" arity="3"/> - <fsummary>Center a string.</fsummary> - <desc> - <p>Returns a string, where <c><anno>String</anno></c> is centered in the - string and surrounded by blanks or <c><anno>Character</anno></c>. - The resulting string has length <c><anno>Number</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/3"><c>pad/3</c></seealso>. - </p> - </desc> - </func> - - <func> - <name name="chars" arity="2"/> - <name name="chars" arity="3"/> - <fsummary>Return a string consisting of numbers of characters.</fsummary> - <desc> - <p>Returns a string consisting of <c><anno>Number</anno></c> characters - <c><anno>Character</anno></c>. Optionally, the string can end with - string <c><anno>Tail</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="chr" arity="2"/> - <fsummary>Return the index of the first occurrence of - a character in a string.</fsummary> - <desc> - <p>Returns the index of the first occurrence of - <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns - <c>0</c> if <c><anno>Character</anno></c> does not occur.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/2"><c>find/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="concat" arity="2"/> - <fsummary>Concatenate two strings.</fsummary> - <desc> - <p>Concatenates <c><anno>String1</anno></c> and - <c><anno>String2</anno></c> to form a new string - <c><anno>String3</anno></c>, which is returned.</p> - <p> - This function is <seealso marker="#oldapi">obsolete</seealso>. - Use <c>[<anno>String1</anno>, <anno>String2</anno>]</c> as - <c>Data</c> argument, and call - <seealso marker="unicode#characters_to_list/2"> - <c>unicode:characters_to_list/2</c></seealso> or - <seealso marker="unicode#characters_to_binary/2"> - <c>unicode:characters_to_binary/2</c></seealso> - to flatten the output. - </p> - </desc> - </func> - - <func> - <name name="copies" arity="2"/> - <fsummary>Copy a string.</fsummary> - <desc> - <p>Returns a string containing <c><anno>String</anno></c> repeated - <c><anno>Number</anno></c> times.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="cspan" arity="2"/> - <fsummary>Span characters at start of a string.</fsummary> - <desc> - <p>Returns the length of the maximum initial segment of - <c><anno>String</anno></c>, which consists entirely of characters - not from <c><anno>Chars</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#take/3"><c>take/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:cspan("\t abcdef", " \t"). -0</code> - </desc> - </func> - - <func> - <name name="join" arity="2"/> - <fsummary>Join a list of strings with separator.</fsummary> - <desc> - <p>Returns a string with the elements of <c><anno>StringList</anno></c> - separated by the string in <c><anno>Separator</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="lists#join/2"><c>lists:join/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> join(["one", "two", "three"], ", "). -"one, two, three"</code> - </desc> - </func> - - <func> - <name name="left" arity="2"/> - <name name="left" arity="3"/> - <fsummary>Adjust left end of a string.</fsummary> - <desc> - <p>Returns <c><anno>String</anno></c> with the length adjusted in - accordance with <c><anno>Number</anno></c>. The left margin is - fixed. If <c>length(<anno>String</anno>)</c> < - <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded - with blanks or <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/2"><c>pad/2</c></seealso> or - <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:left("Hello",10,$.). -"Hello....."</code> - </desc> - </func> - - <func> - <name name="len" arity="1"/> - <fsummary>Return the length of a string.</fsummary> - <desc> - <p>Returns the number of characters in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#length/1"><c>length/1</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="rchr" arity="2"/> - <fsummary>Return the index of the last occurrence of - a character in a string.</fsummary> - <desc> - <p>Returns the index of the last occurrence of - <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns - <c>0</c> if <c><anno>Character</anno></c> does not occur.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/3"><c>find/3</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="right" arity="2"/> - <name name="right" arity="3"/> - <fsummary>Adjust right end of a string.</fsummary> - <desc> - <p>Returns <c><anno>String</anno></c> with the length adjusted in - accordance with <c><anno>Number</anno></c>. The right margin is - fixed. If the length of <c>(<anno>String</anno>)</c> < - <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded - with blanks or <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#pad/3"><c>pad/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:right("Hello", 10, $.). -".....Hello"</code> - </desc> - </func> - - <func> - <name name="rstr" arity="2"/> - <fsummary>Find the index of a substring.</fsummary> - <desc> - <p>Returns the position where the last occurrence of - <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>. - Returns <c>0</c> if <c><anno>SubString</anno></c> - does not exist in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/3"><c>find/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:rstr(" Hello Hello World World ", "Hello World"). -8</code> - </desc> - </func> - - <func> - <name name="span" arity="2"/> - <fsummary>Span characters at start of a string.</fsummary> - <desc> - <p>Returns the length of the maximum initial segment of - <c><anno>String</anno></c>, which consists entirely of characters - from <c><anno>Chars</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#take/2"><c>take/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:span("\t abcdef", " \t"). -5</code> - </desc> - </func> - - <func> - <name name="str" arity="2"/> - <fsummary>Find the index of a substring.</fsummary> - <desc> - <p>Returns the position where the first occurrence of - <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>. - Returns <c>0</c> if <c><anno>SubString</anno></c> - does not exist in <c><anno>String</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#find/2"><c>find/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:str(" Hello Hello World World ", "Hello World"). -8</code> - </desc> - </func> - - <func> - <name name="strip" arity="1"/> - <name name="strip" arity="2"/> - <name name="strip" arity="3"/> - <fsummary>Strip leading or trailing characters.</fsummary> - <desc> - <p>Returns a string, where leading or trailing, or both, blanks or a - number of <c><anno>Character</anno></c> have been removed. - <c><anno>Direction</anno></c>, which can be <c>left</c>, <c>right</c>, - or <c>both</c>, indicates from which direction blanks are to be - removed. <c>strip/1</c> is equivalent to - <c>strip(String, both)</c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#trim/3"><c>trim/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:strip("...Hello.....", both, $.). -"Hello"</code> - </desc> - </func> - - <func> - <name name="sub_string" arity="2"/> - <name name="sub_string" arity="3"/> - <fsummary>Extract a substring.</fsummary> - <desc> - <p>Returns a substring of <c><anno>String</anno></c>, starting at - position <c><anno>Start</anno></c> to the end of the string, or to - and including position <c><anno>Stop</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -sub_string("Hello World", 4, 8). -"lo Wo"</code> - </desc> - </func> - - <func> - <name name="substr" arity="2"/> - <name name="substr" arity="3"/> - <fsummary>Return a substring of a string.</fsummary> - <desc> - <p>Returns a substring of <c><anno>String</anno></c>, starting at - position <c><anno>Start</anno></c>, and ending at the end of the - string or at length <c><anno>Length</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#slice/3"><c>slice/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> substr("Hello World", 4, 5). -"lo Wo"</code> - </desc> - </func> - - <func> - <name name="sub_word" arity="2"/> - <name name="sub_word" arity="3"/> - <fsummary>Extract subword.</fsummary> - <desc> - <p>Returns the word in position <c><anno>Number</anno></c> of - <c><anno>String</anno></c>. Words are separated by blanks or - <c><anno>Character</anno></c>s.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#nth_lexeme/3"><c>nth_lexeme/3</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> string:sub_word(" Hello old boy !",3,$o). -"ld b"</code> - </desc> - </func> - - <func> - <name name="to_lower" arity="1" clause_i="1"/> - <name name="to_lower" arity="1" clause_i="2"/> - <name name="to_upper" arity="1" clause_i="1"/> - <name name="to_upper" arity="1" clause_i="2"/> - <fsummary>Convert case of string (ISO/IEC 8859-1).</fsummary> - <type variable="String" name_i="1"/> - <type variable="Result" name_i="1"/> - <type variable="Char"/> - <type variable="CharResult"/> - <desc> - <p>The specified string or character is case-converted. Notice that - the supported character set is ISO/IEC 8859-1 (also called Latin 1); - all values outside this set are unchanged</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso> use - <seealso marker="#lowercase/1"><c>lowercase/1</c></seealso>, - <seealso marker="#uppercase/1"><c>uppercase/1</c></seealso>, - <seealso marker="#titlecase/1"><c>titlecase/1</c></seealso> or - <seealso marker="#casefold/1"><c>casefold/1</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="tokens" arity="2"/> - <fsummary>Split string into tokens.</fsummary> - <desc> - <p>Returns a list of tokens in <c><anno>String</anno></c>, separated - by the characters in <c><anno>SeparatorList</anno></c>.</p> - <p><em>Example:</em></p> - <code type="none"> -> tokens("abc defxxghix jkl", "x "). -["abc", "def", "ghi", "jkl"]</code> - <p>Notice that, as shown in this example, two or more - adjacent separator characters in <c><anno>String</anno></c> - are treated as one. That is, there are no empty - strings in the resulting list of tokens.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p> - </desc> - </func> - - <func> - <name name="words" arity="1"/> - <name name="words" arity="2"/> - <fsummary>Count blank separated words.</fsummary> - <desc> - <p>Returns the number of words in <c><anno>String</anno></c>, separated - by blanks or <c><anno>Character</anno></c>.</p> - <p>This function is <seealso marker="#oldapi">obsolete</seealso>. - Use - <seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p> - <p><em>Example:</em></p> - <code type="none"> -> words(" Hello old boy!", $o). -4</code> - </desc> - </func> - </funcs> - - <section> - <title>Notes</title> - <p>Some of the general string functions can seem to overlap each - other. The reason is that this string package is the - combination of two earlier packages and all functions of - both packages have been retained.</p> - </section> - </erlref> diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index 26dc46719e..789e063c12 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -719,8 +719,8 @@ Eshell V5.10.1 (abort with ^G) </section> <section> - <title>Unicode Filenames</title> <marker id="unicode_file_names"/> + <title>Unicode Filenames</title> <p>Most modern operating systems support Unicode filenames in some way. There are many different ways to do this and Erlang by default treats the different approaches differently:</p> @@ -855,8 +855,12 @@ Eshell V5.10.1 (abort with ^G) </note> <section> - <title>Notes About Raw Filenames</title> <marker id="notes-about-raw-filenames"/> + <title>Notes About Raw Filenames</title> + <note><p> + Note that raw filenames <em>not</em> necessarily are encoded the + same way as on the OS level. + </p></note> <p>Raw filenames were introduced together with Unicode filename support in ERTS 5.8.2 (Erlang/OTP R14B01). The reason "raw filenames" were introduced in the system was diff --git a/lib/stdlib/doc/src/user_guide.gif b/lib/stdlib/doc/src/user_guide.gif Binary files differdeleted file mode 100644 index e6275a803d..0000000000 --- a/lib/stdlib/doc/src/user_guide.gif +++ /dev/null diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index c04a201ce1..9a447af5b7 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -668,19 +668,23 @@ lm() -> [l(M) || M <- mm()]. %% erlangrc(Home) -%% Try to run a ".erlang" file, first in the current directory -%% else in home directory. +%% Try to run a ".erlang" file in home directory. + +-spec erlangrc() -> {ok, file:filename()} | {error, term()}. erlangrc() -> case init:get_argument(home) of {ok,[[Home]]} -> erlangrc([Home]); _ -> - f_p_e(["."], ".erlang") + {error, enoent} end. -erlangrc([Home]) -> - f_p_e([".",Home], ".erlang"). +-spec erlangrc(PathList) -> {ok, file:filename()} | {error, term()} + when PathList :: [Dir :: file:name()]. + +erlangrc([Home|_]=Paths) when is_list(Home) -> + f_p_e(Paths, ".erlang"). error(Fmt, Args) -> error_logger:error_msg(Fmt, Args). @@ -692,11 +696,11 @@ f_p_e(P, F) -> {error, E={Line, _Mod, _Term}} -> error("file:path_eval(~tp,~tp): error on line ~p: ~ts~n", [P, F, Line, file:format_error(E)]), - ok; + {error, E}; {error, E} -> error("file:path_eval(~tp,~tp): ~ts~n", [P, F, file:format_error(E)]), - ok; + {error, E}; Other -> Other end. diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index 31d0d499e3..00e6a10d8a 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -479,7 +479,7 @@ com_enc(_B, _Fun, _N, L, Ps) -> com_enc_end([L | Ps]). com_enc_end(Ps0) -> - Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]), + Ps = lists:reverse([lists:reverse(lowercase(P)) || P <- Ps0]), com_encoding(Ps). com_encoding(["latin","1"|_]) -> @@ -489,6 +489,9 @@ com_encoding(["utf","8"|_]) -> com_encoding(_) -> throw(no). % Don't try any further +lowercase(S) -> + unicode:characters_to_list(string:lowercase(S)). + normalize_typed_record_fields([]) -> {typed, []}; normalize_typed_record_fields(Fields) -> diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 9cd4727dc3..f58cb35cea 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3910,10 +3910,9 @@ check_format_string(Fmt) -> extract_sequences(Fmt, []). extract_sequences(Fmt, Need0) -> - case string:chr(Fmt, $~) of - 0 -> {ok,lists:reverse(Need0)}; %That's it - Pos -> - Fmt1 = string:substr(Fmt, Pos+1), %Skip ~ + case string:find(Fmt, [$~]) of + nomatch -> {ok,lists:reverse(Need0)}; %That's it + [$~|Fmt1] -> case extract_sequence(1, Fmt1, Need0) of {ok,Need1,Rest} -> extract_sequences(Rest, Need1); Error -> Error diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index b0064aadb8..367dbefb82 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -237,13 +237,20 @@ lform({attribute,Line,Name,Arg}, Opts) -> lform({function,Line,Name,Arity,Clauses}, Opts) -> lfunction({function,Line,Name,Arity,Clauses}, Opts); %% These are specials to make it easier for the compiler. -lform({error,E}, _Opts) -> - leaf(format("~p\n", [{error,E}])); -lform({warning,W}, _Opts) -> - leaf(format("~p\n", [{warning,W}])); +lform({error,_}=E, Opts) -> + message(E, Opts); +lform({warning,_}=W, Opts) -> + message(W, Opts); lform({eof,_Line}, _Opts) -> $\n. +message(M, #options{encoding = Encoding}) -> + F = case Encoding of + latin1 -> "~p\n"; + unicode -> "~tp\n" + end, + leaf(format(F, [M])). + lattribute({attribute,_Line,type,Type}, Opts) -> [typeattr(type, Type, Opts),leaf(".\n")]; lattribute({attribute,_Line,opaque,Type}, Opts) -> @@ -902,7 +909,7 @@ maybe_paren(_P, _Prec, Expr) -> Expr. leaf(S) -> - {leaf,chars_size(S),S}. + {leaf,string:length(S),S}. %%% Do the formatting. Currently nothing fancy. Could probably have %%% done it in one single pass. @@ -962,7 +969,7 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) -> Sizes = BSizeL ++ SizeL, NSepChars = if is_list(Sep), Sep =/= [] -> - erlang:max(0, length(CharsL)-1); + erlang:max(0, length(CharsL)-1); % not string:length true -> 0 end, @@ -1118,7 +1125,7 @@ incr(I, Incr) -> I+Incr. indentation(E, I) when I < 0 -> - chars_size(E); + string:length(E); indentation(E, I0) -> I = io_lib_format:indentation(E, I0), case has_nl(E) of @@ -1155,19 +1162,19 @@ write_a_string(S, I, PP) -> write_a_string([], _N, _Len, _PP) -> []; write_a_string(S, N, Len, PP) -> - SS = string:sub_string(S, 1, N), + SS = string:slice(S, 0, N), Sl = write_string(SS, PP), - case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of + case (string:length(Sl) > Len) and (N > ?MIN_SUBSTRING) of true -> write_a_string(S, N-1, Len, PP); false -> [flat_leaf(Sl) | - write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)] + write_a_string(string:slice(S, string:length(SS)), Len, Len, PP)] end. flat_leaf(S) -> L = lists:flatten(S), - {leaf,length(L),L}. + {leaf,string:length(L),L}. write_value(V, PP) -> (PP#pp.value_fun)(V). @@ -1188,15 +1195,6 @@ write_char(C, PP) -> a0() -> erl_anno:new(0). -chars_size([C | Es]) when is_integer(C) -> - 1 + chars_size(Es); -chars_size([E | Es]) -> - chars_size(E) + chars_size(Es); -chars_size([]) -> - 0; -chars_size(B) when is_binary(B) -> - byte_size(B). - -define(N_SPACES, 30). spacetab() -> diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 47223b129c..4774c4bf19 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2015. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -752,7 +752,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> {char_error,Ncs,Error,Nline,Ncol,EndCol} -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> - Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars. scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %" {Ncs,Nline,Ncol,Nstr,Nwcs} -> Anno = anno(Line0, Col0, St, Nstr), @@ -767,7 +767,7 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> {char_error,Ncs,Error,Nline,Ncol,EndCol} -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> - Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. + Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars. scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %' {Ncs,Nline,Ncol,Nstr,Nwcs} -> case catch list_to_atom(Nwcs) of diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl index 2b9d8ff65b..132f8efbbe 100644 --- a/lib/stdlib/src/escript.erl +++ b/lib/stdlib/src/escript.erl @@ -224,8 +224,8 @@ return_sections(S, Bin) -> normalize_section(Name, undefined) -> {Name, undefined}; normalize_section(shebang, "#!" ++ Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(Chopped, both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(Chopped, both), if Stripped =:= ?SHEBANG -> {shebang, default}; @@ -233,8 +233,8 @@ normalize_section(shebang, "#!" ++ Chars) -> {shebang, Stripped} end; normalize_section(comment, Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(string:strip(Chopped, left, $%), both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(string:trim(Chopped, leading, "$%"), both), if Stripped =:= ?COMMENT -> {comment, default}; @@ -242,8 +242,8 @@ normalize_section(comment, Chars) -> {comment, Stripped} end; normalize_section(emu_args, "%%!" ++ Chars) -> - Chopped = string:strip(Chars, right, $\n), - Stripped = string:strip(Chopped, both), + Chopped = string:trim(Chars, trailing, "$\n"), + Stripped = string:trim(Chopped, both), {emu_args, Stripped}; normalize_section(Name, Chars) -> {Name, Chars}. diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl index b5d3cd3c8d..b6548626f3 100644 --- a/lib/stdlib/src/ets.erl +++ b/lib/stdlib/src/ets.erl @@ -1700,6 +1700,8 @@ choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) -> io:format("~ts\n", [ErrorString]), choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end; + eof -> + ok; _ -> choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end. @@ -1717,7 +1719,7 @@ get_line(P, Default) -> line_string(Binary) when is_binary(Binary) -> unicode:characters_to_list(Binary); line_string(Other) -> Other. -nonl(S) -> string:strip(S, right, $\n). +nonl(S) -> string:trim(S, trailing, "$\n"). print_number(Tab, Key, Num) -> Os = ets:lookup(Tab, Key), @@ -1746,7 +1748,7 @@ do_display_item(_Height, Width, I, Opos) -> L = to_string(I), L2 = if length(L) > Width - 8 -> - string:substr(L, 1, Width-13) ++ " ..."; + string:slice(L, 0, Width-13) ++ " ..."; true -> L end, diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d7c313f214..0f90b3fc33 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). %%% Compiling a wildcard. + +%% Define characters used for escaping a \. +-define(ESCAPE_PREFIX, $@). +-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]). +-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]). + %% Only for debugging. compile_wildcard(Pattern) when is_list(Pattern) -> {compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}. -compile_wildcard(Pattern, Cwd0) -> +compile_wildcard(Pattern0, Cwd0) -> + Pattern = convert_escapes(Pattern0), [Root|Rest] = filename:split(Pattern), case filename:pathtype(Root) of relative -> @@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) -> compile_join({root,PrefixLen,Root}, File) -> {root,PrefixLen,filename:join(Root, File)}. -compile_part(Part) -> +compile_part(Part0) -> + Part = wrap_escapes(Part0), compile_part(Part, false, []). compile_part_to_sep(Part) -> @@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) -> error -> compile_part(Rest, Upto, [${|Result]) end; +compile_part([{escaped,X}|Rest], Upto, Result) -> + compile_part(Rest, Upto, [X|Result]); compile_part([X|Rest], Upto, Result) -> compile_part(Rest, Upto, [X|Result]); compile_part([], _Upto, Result) -> @@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); compile_charset1([$]|Rest], Ordset) -> {ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest}; +compile_charset1([{escaped,X}|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([X|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([], _Ordset) -> @@ -486,6 +498,32 @@ compile_alt(Pattern, Result) -> error end. +%% Convert backslashes to an illegal Unicode character to +%% protect in from filename:split/1. + +convert_escapes([?ESCAPE_PREFIX|T]) -> + ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T); +convert_escapes([$\\|T]) -> + ?ESCAPE_CHARACTER ++ convert_escapes(T); +convert_escapes([H|T]) -> + [H|convert_escapes(T)]; +convert_escapes([]) -> + []. + +%% Wrap each escape in a tuple to remove the special meaning for +%% the character that follows. + +wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) -> + [?ESCAPE_PREFIX|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) -> + [{escaped,C}|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER) -> + []; +wrap_escapes([H|T]) -> + [H|wrap_escapes(T)]; +wrap_escapes([]) -> + []. + badpattern(Reason) -> error({badpattern,Reason}). diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 9bf4290916..919f8f20e6 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -34,6 +34,38 @@ %% we flatten the arguments immediately on function entry as that makes %% it easier to ensure that the code works. +%% +%% *** Requirements on Raw Filename Format *** +%% +%% These requirements are due to the 'filename' module +%% in stdlib. This since it is documented that it +%% should be able to operate on raw filenames as well +%% as ordinary filenames. +%% +%% A raw filename *must* be a byte sequence where: +%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded +%% as a byte with the corresponding value. That is, +%% the most significant bit in the byte encoding the +%% codepoint is never set. +%% 2. Codepoints greater than 127 *must* be encoded +%% with the most significant bit set in *every* byte +%% encoding it. +%% +%% Latin1 and UTF-8 meet these requirements while +%% UTF-16 and UTF-32 don't. +%% +%% On Windows filenames are natively stored as malformed +%% UTF-16LE (lonely surrogates may appear). A more correct +%% description than UTF-16 would be an array of 16-bit +%% words... In order to meet the requirements of the +%% raw file format we convert the malformed UTF-16LE to +%% malformed UTF-8 which meet the requirements. +%% +%% Note that these requirements are today only OTP +%% internal (erts-stdlib internal) requirements that +%% could be changed. +%% + -export([absname/1, absname/2, absname_join/2, basename/1, basename/2, dirname/1, extension/1, join/1, join/2, pathtype/1, @@ -41,6 +73,7 @@ safe_relative_path/1]). -export([find_src/1, find_src/2]). % deprecated -export([basedir/2, basedir/3]). +-export([validate/1]). %% Undocumented and unsupported exports. -export([append/2]). @@ -1036,10 +1069,10 @@ basedir_linux(Type) -> user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true); site_data -> Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false), - string:tokens(Base,":"); + string:lexemes(Base, ":"); site_config -> Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false), - string:tokens(Base,":") + string:lexemes(Base, ":") end. -define(basedir_darwin_user_data, "Library/Application Support"). @@ -1135,3 +1168,72 @@ basedir_os_type() -> {win32,_} -> windows; _ -> linux end. + +%% +%% validate/1 +%% + +-spec validate(FileName) -> boolean() when + FileName :: file:name_all(). + +validate(FileName) when is_binary(FileName) -> + %% Raw filename... + validate_bin(FileName); +validate(FileName) when is_list(FileName); + is_atom(FileName) -> + validate_list(FileName, + file:native_name_encoding(), + os:type()). + +validate_list(FileName, Enc, Os) -> + try + true = validate_list(FileName, Enc, Os, 0) > 0 + catch + _ : _ -> false + end. + +validate_list([], _Enc, _Os, Chars) -> + Chars; +validate_list(C, Enc, Os, Chars) when is_integer(C) -> + validate_char(C, Enc, Os), + Chars+1; +validate_list(A, Enc, Os, Chars) when is_atom(A) -> + validate_list(atom_to_list(A), Enc, Os, Chars); +validate_list([H|T], Enc, Os, Chars) -> + NewChars = validate_list(H, Enc, Os, Chars), + validate_list(T, Enc, Os, NewChars). + +%% C is always an integer... +% validate_char(C, _, _) when not is_integer(C) -> +% throw(invalid); +validate_char(C, _, _) when C < 1 -> + throw(invalid); %% No negative or null characters... +validate_char(C, latin1, _) when C > 255 -> + throw(invalid); +validate_char(C, utf8, _) when C >= 16#110000 -> + throw(invalid); +validate_char(C, utf8, {win32, _}) when C > 16#ffff -> + throw(invalid); %% invalid win wchar... +validate_char(_C, utf8, {win32, _}) -> + ok; %% Range below is accepted on windows... +validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF -> + throw(invalid); %% invalid unicode range... +validate_char(_, _, _) -> + ok. + +validate_bin(Bin) -> + %% Raw filename. That is, we do not interpret + %% the encoding, but we still do not accept + %% null characters... + try + true = validate_bin(Bin, 0) > 0 + catch + _ : _ -> false + end. + +validate_bin(<<>>, Bs) -> + Bs; +validate_bin(<<0, _Rest/binary>>, _Bs) -> + throw(invalid); %% No null characters allowed... +validate_bin(<<_B, Rest/binary>>, Bs) -> + validate_bin(Rest, Bs+1). diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl index 1110d18af6..cd6312855d 100644 --- a/lib/stdlib/src/gen_statem.erl +++ b/lib/stdlib/src/gen_statem.erl @@ -296,7 +296,7 @@ (Reason :: term()). %% Format the callback module state in some sensible that is -%% often condensed way. For StatusOption =:= 'normal' the perferred +%% often condensed way. For StatusOption =:= 'normal' the preferred %% return term is [{data,[{"State",FormattedState}]}], and for %% StatusOption =:= 'terminate' it is just FormattedState. -callback format_status( @@ -510,8 +510,6 @@ call(ServerRef, Request, Timeout) -> parse_timeout(Timeout) -> case Timeout of - {clean_timeout,infinity} -> - {dirty_timeout,infinity}; {clean_timeout,_} -> Timeout; {dirty_timeout,_} -> diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 4b2d15c8b3..e345810ca0 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -380,7 +380,7 @@ float_e(_Fl, {Ds,E}, P) -> {Fs,false} -> [Fs|float_exp(E-1)] end. -%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}. +%% float_man([Digit], Icount, Dcount) -> {[Char],CarryFlag}. %% Generate the characters in the mantissa from the digits with Icount %% characters before the '.' and Dcount decimals. Handle carry and let %% caller decide what to do at top. @@ -395,7 +395,7 @@ float_man([D|Ds], I, Dc) -> {Cs,false} -> {[D|Cs],false} end; float_man([], I, Dc) -> %Pad with 0's - {string:chars($0, I, [$.|string:chars($0, Dc)]),false}. + {lists:duplicate(I, $0) ++ [$.|lists:duplicate(Dc, $0)],false}. float_man([D|_], 0) when D >= $5 -> {[],true}; float_man([_|_], 0) -> {[],false}; @@ -405,7 +405,7 @@ float_man([D|Ds], Dc) -> {Cs,true} -> {[D+1|Cs],false}; {Cs,false} -> {[D|Cs],false} end; -float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's +float_man([], Dc) -> {lists:duplicate(Dc, $0),false}. %Pad with 0's %% float_exp(Exponent) -> [Char]. %% Generate the exponent of a floating point number. Always include sign. @@ -429,7 +429,7 @@ fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 -> float_f(Fl, Fd, P) when Fl < 0.0 -> [$-|float_f(-Fl, Fd, P)]; float_f(Fl, {Ds,E}, P) when E =< 0 -> - float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's + float_f(Fl, {lists:duplicate(-E+1, $0)++Ds,1}, P); %Prepend enough 0's float_f(_Fl, {Ds,E}, P) -> case float_man(Ds, E, P) of {Fs,true} -> "1" ++ Fs; %Handle carry @@ -751,7 +751,7 @@ adjust(Data, Pad, right) -> [Pad|Data]. flat_trunc(List, N) when is_integer(N), N >= 0 -> string:slice(List, 0, N). -%% A deep version of string:chars/2,3 +%% A deep version of lists:duplicate/2 chars(_C, 0) -> []; diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl index c6eb0d7915..a7980cc294 100644 --- a/lib/stdlib/src/lib.erl +++ b/lib/stdlib/src/lib.erl @@ -646,7 +646,7 @@ pp_arguments(PF, As, I, Enc) -> Ll = length(L), A = list_to_atom(lists:duplicate(Ll, $a)), S0 = unicode:characters_to_list(PF([A | T], I+1), Enc), - brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc); + brackets_to_parens([$[,L,string:slice(S0, 1+Ll)], Enc); _ -> brackets_to_parens(PF(As, I+1), Enc) end. diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl index c59db903dc..122b476ddb 100644 --- a/lib/stdlib/src/otp_internal.erl +++ b/lib/stdlib/src/otp_internal.erl @@ -466,8 +466,6 @@ obsolete_1(inviso, _, _) -> {removed,"the inviso application was removed in R16"}; %% Added in R15B01. -obsolete_1(gs, _, _) -> - {removed,"the gs application has been removed; use the wx application instead"}; obsolete_1(ssh, sign_data, 2) -> {removed,"removed in R16A; use public_key:pem_decode/1, public_key:pem_entry_decode/1 " "and public_key:sign/3 instead"}; @@ -611,6 +609,52 @@ obsolete_1(filename, find_src, 2) -> obsolete_1(erlang, hash, 2) -> {removed, {erlang, phash2, 2}, "20.0"}; +%% Added in OTP-21 +obsolete_1(string, len, 1) -> + {deprecated, "deprecated; use string:length/3 instead"}; +obsolete_1(string, concat, 2) -> + {deprecated, "deprecated; use [Str1,Str2] instead"}; +obsolete_1(string, str, 2) -> + {deprecated, "deprecated; use string:find/2 instead"}; +obsolete_1(string, rstr, 2) -> + {deprecated, "deprecated; use string:find/3 instead"}; +obsolete_1(string, chr, 2) -> + {deprecated, "deprecated; use string:find/2 instead"}; +obsolete_1(string, rchr, 2) -> + {deprecated, "deprecated; use string:find/3 instead"}; +obsolete_1(string, span, 2) -> + {deprecated, "deprecated; use string:take/2 instead"}; +obsolete_1(string, cspan, 2) -> + {deprecated, "deprecated; use string:take/3 instead"}; +obsolete_1(string, substr, _) -> + {deprecated, "deprecated; use string:slice/3 instead"}; +obsolete_1(string, tokens, 2) -> + {deprecated, "deprecated; use string:lexemes/2 instead"}; +obsolete_1(string, chars, _) -> + {deprecated, "deprecated; use lists:duplicate/2 instead"}; +obsolete_1(string, copies, _) -> + {deprecated, "deprecated; use lists:duplicate/2 instead"}; +obsolete_1(string, words, _) -> + {deprecated, "deprecated; use string:lexemes/2 instead"}; +obsolete_1(string, strip, _) -> + {deprecated, "deprecated; use string:trim/3 instead"}; +obsolete_1(string, sub_word, _) -> + {deprecated, "deprecated; use string:nth_lexeme/3 instead"}; +obsolete_1(string, sub_string, _) -> + {deprecated, "deprecated; use string:slice/3 instead"}; +obsolete_1(string, left, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, right, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, centre, _) -> + {deprecated, "deprecated; use string:pad/3 instead"}; +obsolete_1(string, join, _) -> + {deprecated, "deprecated; use lists:join/2 instead"}; +obsolete_1(string, to_upper, _) -> + {deprecated, "deprecated; use string:uppercase/1 or string:titlecase/1 instead"}; +obsolete_1(string, to_lower, _) -> + {deprecated, "deprecated; use string:lowercase/1 or string:casefold/1 instead"}; + %% not obsolete obsolete_1(_, _, _) -> diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl index 05950a1d7c..b12ff205b1 100644 --- a/lib/stdlib/src/pool.erl +++ b/lib/stdlib/src/pool.erl @@ -25,7 +25,7 @@ %% with the least load !!!! %% This function is callable from any node including the master %% That is part of the pool -%% nodes are scheduled on a per usgae basis and per load basis, +%% nodes are scheduled on a per usage basis and per load basis, %% Whenever we use a node, we put at the end of the queue, and whenever %% a node report a change in load, we insert it accordingly @@ -197,7 +197,7 @@ pure_insert({Load,Node},[{L,N}|Tail]) when Load < L -> pure_insert(L,[H|T]) -> [H|pure_insert(L,T)]. %% Really should not measure the contributions from -%% the back ground processes here .... which we do :-( +%% the background processes here .... which we do :-( %% We don't have to monitor the master, since we're slaves anyway statistic_collector() -> @@ -213,7 +213,7 @@ statistic_collector(I) -> stat_loop(M, 999999) end. -%% Do not tell the master about our load if it has not changed +%% Do not tell the master about our load if it has not changed stat_loop(M, Old) -> sleep(2000), diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl index d7cf6386f5..b3f3206d67 100644 --- a/lib/stdlib/src/slave.erl +++ b/lib/stdlib/src/slave.erl @@ -320,7 +320,7 @@ mk_cmd(Host, Name, Args, Waiter, Prog0) -> %% emulator and flags as the test node. The return from lib:progname() %% could then typically be '/<full_path_to>/cerl -gcov'). quote_progname(Progname) -> - do_quote_progname(string:tokens(to_list(Progname)," ")). + do_quote_progname(string:lexemes(to_list(Progname)," ")). do_quote_progname([Prog]) -> "\""++Prog++"\""; diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index 3c449d3cb9..ab0824ca17 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -107,7 +107,7 @@ dets]}, {applications, [kernel]}, {env, []}, - {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3", + {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3", "compiler-5.0"]} ]}. diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index 4972da297d..5a4d2df2a6 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -87,6 +87,16 @@ %%% May be removed -export([list_to_float/1, list_to_integer/1]). +-deprecated([{len,1},{concat,2}, + {str,2},{chr,2},{rchr,2},{rstr,2}, + {span,2},{cspan,2},{substr,'_'},{tokens,2}, + {chars,'_'}, + {copies,2},{words,'_'},{strip,'_'}, + {sub_word,'_'},{left,'_'},{right,'_'}, + {sub_string,'_'},{centre,'_'},{join,2}, + {to_upper,1}, {to_lower,1} + ]). + %% Uses bifs: string:list_to_float/1 and string:list_to_integer/1 -spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when String :: string(), diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 7b79dcf04d..523cb95065 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -69,6 +69,7 @@ MODULES= \ sets_test_lib \ sofs_SUITE \ stdlib_SUITE \ + stdlib_bench_SUITE \ string_SUITE \ supervisor_1 \ supervisor_2 \ @@ -146,7 +147,7 @@ release_spec: opt release_tests_spec: make_emakefile $(INSTALL_DIR) "$(RELSYSDIR)" - $(INSTALL_DATA) stdlib.spec $(EMAKEFILE) \ + $(INSTALL_DATA) stdlib.spec stdlib_bench.spec $(EMAKEFILE) \ $(ERL_FILES) $(COVERFILE) "$(RELSYSDIR)" chmod -R u+w "$(RELSYSDIR)" @tar cf - *_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index c94821bc75..1236fe45f4 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -120,7 +120,7 @@ wcc(Wc, Error) -> do_wildcard_1(Dir, Wcf0) -> do_wildcard_2(Dir, Wcf0), Wcf = fun(Wc0) -> - Wc = filename:join(Dir, Wc0), + Wc = Dir ++ "/" ++ Wc0, L = Wcf0(Wc), [subtract_dir(N, Dir) || N <- L] end, @@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) -> %% Cleanup. del(Files), [ok = file:del_dir(D) || D <- lists:reverse(Dirs)], - ok. + do_wildcard_10(Dir, Wcf). + +%% ERL-451/OTP-14577: Escape characters using \\. +do_wildcard_10(Dir, Wcf) -> + All0 = ["{abc}","abc","def","---","z--","@a,b","@c"], + All = case os:type() of + {unix,_} -> + %% '?' is allowed in file names on Unix, but + %% not on Windows. + ["?q"|All0]; + _ -> + All0 + end, + Files = mkfiles(lists:reverse(All), Dir), + + ["{abc}"] = Wcf("\\{a*"), + ["{abc}"] = Wcf("\\{abc}"), + ["abc","def","z--"] = Wcf("[a-z]*"), + ["---","abc","z--"] = Wcf("[a\\-z]*"), + ["@a,b","@c"] = Wcf("@{a\\,b,c}"), + ["@c"] = Wcf("@{a,b,c}"), + + case os:type() of + {unix,_} -> + ["?q"] = Wcf("\\?q"); + _ -> + [] = Wcf("\\?q") + end, + del(Files), + ok. fold_files(Config) when is_list(Config) -> Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"), diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index 432293b656..f69d42551e 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -80,7 +80,7 @@ test() -> end, Tests). algs() -> - [exs64, exsplus, exsp, exrop, exs1024, exs1024s]. + [exrop, exsp, exs1024s, exs64, exsplus, exs1024]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -459,213 +459,233 @@ measure(Config) -> {skip,{will_not_run_in_scaled_time,Scale}} end. +-define(CHECK_UNIFORM_RANGE(Gen, Range, X, St), + case (Gen) of + {(X), (St)} when is_integer(X), 1 =< (X), (X) =< (Range) -> + St + end). +-define(CHECK_UNIFORM(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X), 0.0 =< (X), (X) < 1.0 -> + St + end). +-define(CHECK_UNIFORM_NZ(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X), 0.0 < (X), (X) =< 1.0 -> + St + end). +-define(CHECK_NORMAL(Gen, X, St), + case (Gen) of + {(X), (St)} when is_float(X) -> + St + end). + do_measure(_Config) -> - Algos = + Algs = + algs() ++ try crypto:strong_rand_bytes(1) of - <<_>> -> [crypto64, crypto] + <<_>> -> [crypto64, crypto_cache, crypto] catch error:low_entropy -> []; error:undef -> [] - end ++ algs(), + end, %% - ct:pal("RNG uniform integer performance~n",[]), - TMark1 = + ct:pal("~nRNG uniform integer range 10000 performance~n",[]), + _ = measure_1( - random, fun (_) -> 10000 end, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), - _ = - [measure_1( - Algo, - fun (_) -> 10000 end, - TMark1, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% - ct:pal("~nRNG uniform integer half range performance~n",[]), - HalfRangeFun = fun (State) -> half_range(State) end, - TMark2 = - measure_1( - random, - HalfRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + ct:pal("~nRNG uniform integer 32 bit performance~n",[]), _ = - [measure_1( - Algo, - HalfRangeFun, - TMark2, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], - %% - ct:pal("~nRNG uniform integer half range + 1 performance~n",[]), - HalfRangePlus1Fun = fun (State) -> half_range(State) + 1 end, - TMark3 = measure_1( - random, - HalfRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + fun (_) -> 1 bsl 32 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform integer half range + 1 performance~n",[]), _ = - [measure_1( - Algo, - HalfRangePlus1Fun, - TMark3, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> half_range(State) + 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range - 1 performance~n",[]), - FullRangeMinus1Fun = fun (State) -> (half_range(State) bsl 1) - 1 end, - TMark4 = - measure_1( - random, - FullRangeMinus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangeMinus1Fun, - TMark4, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> (half_range(State) bsl 1) - 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range performance~n",[]), - FullRangeFun = fun (State) -> half_range(State) bsl 1 end, - TMark5 = - measure_1( - random, - FullRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangeFun, - TMark5, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> half_range(State) bsl 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer full range + 1 performance~n",[]), - FullRangePlus1Fun = fun (State) -> (half_range(State) bsl 1) + 1 end, - TMark6 = - measure_1( - random, - FullRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - FullRangePlus1Fun, - TMark6, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> (half_range(State) bsl 1) + 1 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer double range performance~n",[]), - DoubleRangeFun = fun (State) -> half_range(State) bsl 2 end, - TMark7 = - measure_1( - random, - DoubleRangeFun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), _ = - [measure_1( - Algo, - DoubleRangeFun, - TMark7, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (State) -> + half_range(State) bsl 2 + end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform integer double range + 1 performance~n",[]), - DoubleRangePlus1Fun = fun (State) -> (half_range(State) bsl 2) + 1 end, - TMark8 = + _ = measure_1( - random, - DoubleRangePlus1Fun, - undefined, - fun (Range, State) -> - {int, random:uniform_s(Range, State)} - end), + fun (State) -> + (half_range(State) bsl 2) + 1 + end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), + %% + ct:pal("~nRNG uniform integer 64 bit performance~n",[]), _ = - [measure_1( - Algo, - DoubleRangePlus1Fun, - TMark8, - fun (Range, State) -> - {int, rand:uniform_s(Range, State)} - end) || Algo <- Algos], + measure_1( + fun (_) -> 1 bsl 64 end, + fun (State, Range, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM_RANGE( + Mod:uniform_s(Range, St0), Range, + X, St1) + end, + State) + end, + Algs), %% ct:pal("~nRNG uniform float performance~n",[]), - TMark9 = - measure_1( - random, + _ = measure_1( fun (_) -> 0 end, - undefined, - fun (_, State) -> - {uniform, random:uniform_s(State)} - end), - _ = - [measure_1( - Algo, - fun (_) -> 0 end, - TMark9, - fun (_, State) -> - {uniform, rand:uniform_s(State)} - end) || Algo <- Algos], + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_UNIFORM(Mod:uniform_s(St0), X, St) + end, + State) + end, + Algs), %% ct:pal("~nRNG normal float performance~n",[]), - io:format("~.12w: not implemented (too few bits)~n", [random]), - _ = [measure_1( - Algo, - fun (_) -> 0 end, - TMark9, - fun (_, State) -> - {normal, rand:normal_s(State)} - end) || Algo <- Algos], + _ = measure_1( + fun (_) -> 0 end, + fun (State, _, Mod) -> + measure_loop( + fun (St0) -> + ?CHECK_NORMAL(Mod:normal_s(St0), X, St1) + end, + State) + end, + Algs), ok. -measure_1(Algo, RangeFun, TMark, Gen) -> +measure_loop(Fun, State) -> + measure_loop(Fun, State, ?LOOP). +%% +measure_loop(Fun, State, N) when 0 < N -> + measure_loop(Fun, Fun(State), N-1); +measure_loop(_, _, _) -> + ok. + +measure_1(RangeFun, Fun, Algs) -> + TMark = measure_1(RangeFun, Fun, hd(Algs), undefined), + [TMark] ++ + [measure_1(RangeFun, Fun, Alg, TMark) || Alg <- tl(Algs)]. + +measure_1(RangeFun, Fun, Alg, TMark) -> Parent = self(), - Seed = - case Algo of + {Mod, State} = + case Alg of crypto64 -> - crypto64_seed(); + {rand, crypto64_seed()}; + crypto_cache -> + {rand, crypto:rand_seed_alg(crypto_cache)}; crypto -> - crypto:rand_seed_s(); + {rand, crypto:rand_seed_s()}; random -> - random:seed(os:timestamp()), get(random_seed); + {random, random:seed(os:timestamp()), get(random_seed)}; _ -> - rand:seed_s(Algo) + {rand, rand:seed_s(Alg)} end, - Range = RangeFun(Seed), + Range = RangeFun(State), Pid = spawn_link( fun() -> - Fun = fun() -> measure_2(?LOOP, Range, Seed, Gen) end, - {Time, ok} = timer:tc(Fun), + {Time, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end), Percent = case TMark of undefined -> 100; @@ -673,7 +693,7 @@ measure_1(Algo, RangeFun, TMark, Gen) -> end, io:format( "~.12w: ~p ns ~p% [16#~.16b]~n", - [Algo, (Time * 1000 + 500) div ?LOOP, Percent, Range]), + [Alg, (Time * 1000 + 500) div ?LOOP, Percent, Range]), Parent ! {self(), Time}, normal end), @@ -681,21 +701,6 @@ measure_1(Algo, RangeFun, TMark, Gen) -> {Pid, Msg} -> Msg end. -measure_2(N, Range, State0, Fun) when N > 0 -> - case Fun(Range, State0) of - {int, {Random, State}} - when is_integer(Random), Random >= 1, Random =< Range -> - measure_2(N-1, Range, State, Fun); - {uniform, {Random, State}} - when is_float(Random), 0.0 =< Random, Random < 1.0 -> - measure_2(N-1, Range, State, Fun); - {normal, {Random, State}} when is_float(Random) -> - measure_2(N-1, Range, State, Fun); - Res -> - exit({error, Res, State0}) - end; -measure_2(0, _, _, _) -> ok. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% The jump sequence tests has two parts %% for those with the functional API (jump/1) diff --git a/lib/stdlib/test/stdlib.spec b/lib/stdlib/test/stdlib.spec index 3768e494b2..91712b8963 100644 --- a/lib/stdlib/test/stdlib.spec +++ b/lib/stdlib/test/stdlib.spec @@ -1 +1,2 @@ {suites,"../stdlib_test",all}. +{skip_suites,"../stdlib_test",stdlib_bench_SUITE, "bench only"}. diff --git a/lib/stdlib/test/stdlib_bench.spec b/lib/stdlib/test/stdlib_bench.spec new file mode 100644 index 0000000000..a5d1e1db80 --- /dev/null +++ b/lib/stdlib/test/stdlib_bench.spec @@ -0,0 +1,7 @@ +%% Needed to compile ,unicode_util_SUITE and string_SUITE... +{cases,"../stdlib_test",unicode_util_SUITE, []}. +{cases,"../stdlib_test",string_SUITE, []}. +{skip_suites,"../stdlib_test",unicode_util_SUITE, "bench only"}. +{skip_suites,"../stdlib_test",string_SUITE, "bench only"}. + +{suites,"../stdlib_test",[stdlib_bench_SUITE]}. diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl new file mode 100644 index 0000000000..8670e7029c --- /dev/null +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -0,0 +1,107 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2012-2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%% +-module(stdlib_bench_SUITE). +-compile([export_all, nowarn_export_all]). +-include_lib("common_test/include/ct_event.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. + + +all() -> + [{group,unicode}]. + +groups() -> + [{unicode,[{repeat,5}], + [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, + string_lexemes_list, string_lexemes_binary + ]}]. + +init_per_group(_GroupName, Config) -> + Config. + +end_per_group(_GroupName, Config) -> + Config. + +init_per_suite(Config) -> + Config. + +end_per_suite(Config) -> + Config. + +init_per_testcase(_Func, Conf) -> + Conf. + +end_per_testcase(_Func, _Conf) -> + ok. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(REPEAT_NORM, 5). + +norm_nfc_list(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, list, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +norm_nfc_deep_l(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, deep_l, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +norm_nfc_binary(Config) -> + Bin = norm_data(Config), + {_N, Mean, _Stddev, Res} = unicode_util_SUITE:time_count(nfc, binary, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + + +string_lexemes_list(Config) -> + %% Use nth_lexeme instead of lexemes to avoid building a result of + %% large lists which causes large differences between test runs, gc? + Bin = norm_data(Config), + Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end, + {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, list, Bin, 15), + report(1000.0*Res / Mean). + +string_lexemes_binary(Config) -> + %% Use nth_lexeme instead of lexemes to avoid building a result of + %% large lists which causes large differences between test runs, gc? + Bin = norm_data(Config), + Fun = fun(Str) -> string:nth_lexeme(Str, 200000, [$;,$\n,$\r]), 200000 end, + {_N, Mean, _Stddev, Res} = string_SUITE:time_func(Fun, binary, Bin, ?REPEAT_NORM), + report(1000.0*Res / Mean). + +%%% +report(Tps) -> + ct_event:notify(#event{name = benchmark_data, + data = [{suite,"stdlib_unicode"},{value,round(Tps)}]}), + Tps. + +norm_data(Config) -> + DataDir0 = proplists:get_value(data_dir, Config), + DataDir = filename:join(lists:droplast(filename:split(DataDir0))), + File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]), + {ok, Bin} = file:read_file(File), + Bin. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 90f980c0e5..05f18ef238 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -47,7 +47,7 @@ -export([to_upper_to_lower/1]). %% Run tests when debugging them --export([debug/0]). +-export([debug/0, time_func/4]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -728,7 +728,7 @@ do_measure(TestDir) -> {ok, Bin} = file:read_file(File), io:format("~p~n",[byte_size(Bin)]), Do = fun(Name, Func, Mode) -> - {N, Mean, Stddev, _} = time_func(Func, Mode, Bin), + {N, Mean, Stddev, _} = time_func(Func, Mode, Bin, 50), io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n", [Name, Mode, Mean/1000, Stddev/1000, N]) end, @@ -938,19 +938,19 @@ needs_check(_) -> true. %%%% Timer stuff -time_func(Fun, Mode, Bin) -> +time_func(Fun, Mode, Bin, Repeat) -> timer:sleep(100), %% Let emulator catch up and clean things before test runs Self = self(), Pid = spawn_link(fun() -> Str = mode(Mode, Bin), - Self ! {self(),time_func(0,0,0, Fun, Str, undefined)} + Self ! {self(),time_func(0,0,0, Fun, Str, undefined, Repeat)} end), receive {Pid,Msg} -> Msg end. -time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 -> +time_func(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat -> {Time, Res} = timer:tc(fun() -> Fun(Str) end), - time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); -time_func(N,Sum,SumSq, _, _, Res) -> + time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat); +time_func(N,Sum,SumSq, _, _, Res, _) -> Mean = round(Sum / N), Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))), {N, Mean, Stdev, Res}. diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index 03c24c7027..7dba0a2fd0 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -29,7 +29,9 @@ get/1, count/1]). --export([debug/0, id/1, bin_split/1, uc_loaded_size/0]). +-export([debug/0, id/1, bin_split/1, uc_loaded_size/0, + time_count/4 %% Used by stdlib_bench_SUITE + ]). suite() -> [{ct_hooks,[ts_install_cth]}, @@ -323,7 +325,7 @@ do_measure(Config) -> File = DataDir ++ "/NormalizationTest.txt", {ok, Bin} = file:read_file(File), Do = fun(Func, Mode) -> - {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin), + {N, Mean, Stddev, Res} = time_count(Func, Mode, Bin, 10), io:format("~4w ~6w ~.10w ~.6wms ±~.2wms #~.2w~n", [Func, Mode, Res, Mean div 1000, Stddev div 1000, N]) end, @@ -345,19 +347,19 @@ uc_loaded_size([_|Rest]) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -time_count(Fun, Mode, Bin) -> +time_count(Fun, Mode, Bin, Repeat) -> timer:sleep(100), %% Let emulator catch up and clean things before test runs Self = self(), Pid = spawn_link(fun() -> Str = mode(Mode, Bin), - Self ! {self(),do_count(0,0,0, Fun, Str, undefined)} + Self ! {self(),do_count(0,0,0, Fun, Str, undefined, Repeat)} end), receive {Pid,Msg} -> Msg end. -do_count(N,Sum,SumSq, Fun, Str, _) when N < 10 -> +do_count(N,Sum,SumSq, Fun, Str, _, Repeat) when N < Repeat -> {Time, Res} = do_count(Fun, Str), - do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); -do_count(N,Sum,SumSq, _, _, Res) -> + do_count(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res, Repeat); +do_count(N,Sum,SumSq, _, _, Res, _) -> Mean = round(Sum / N), Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))), {N, Mean, Stdev, Res}. diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript index 5b8763f576..674e5a0628 100755 --- a/lib/stdlib/uc_spec/gen_unicode_mod.escript +++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript @@ -65,7 +65,7 @@ main(_) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% parse_unicode_data(Line0, Acc) -> - Line = string:strip(Line0, right, $\n), + Line = string:chomp(Line0), [CodePoint,Name,_Cat,Class,_BiDi,Decomp, _N1,_N2,_N3,_BDMirror,_Uni1,_Iso|Case] = tokens(Line, ";"), {Dec,Comp} = case to_decomp(Decomp) of @@ -78,14 +78,14 @@ parse_unicode_data(Line0, Acc) -> |Acc]. to_class(String) -> - list_to_integer(string:strip(String, both)). + list_to_integer(string:trim(String, both)). to_decomp("") -> []; to_decomp("<" ++ Str) -> - [Tag,Rest] = string:tokens(Str, ">"), + [Tag,Rest] = string:lexemes(Str, ">"), {list_to_atom(Tag), to_decomp(Rest)}; to_decomp(CodePoints) -> - CPL = string:tokens(CodePoints, " "), + CPL = string:lexemes(CodePoints, " "), [hex_to_int(CP) || CP <- CPL]. to_case(["","",""]) -> []; @@ -105,20 +105,20 @@ parse_special_casing(Line, Table) -> array:set(CP, Entry#cp{cs=Case}, Table). to_scase([Lower,Title,Upper|_]) -> - {unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Upper, " "), both)]), - unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Lower, " "), both)]), - unlist([hex_to_int(CP) || CP <- string:strip(string:tokens(Title, " "), both)]), + {unlist([hex_to_int(CP) || CP <- string:lexemes(Upper, " ")]), + unlist([hex_to_int(CP) || CP <- string:lexemes(Lower, " ")]), + unlist([hex_to_int(CP) || CP <- string:lexemes(Title, " ")]), []}. parse_case_folding(Line, Table) -> [CodePoint, Class0, CaseStr |_Comments] = tokens(Line, ";"), - Class = string:strip(Class0, both), + Class = string:trim(Class0, both), if Class =:= "T" -> Table; %% Do not support localization yet Class =:= "S" -> Table; %% Ignore simple true -> CP = hex_to_int(CodePoint), Case = unlist([hex_to_int(CPC) || - CPC <- string:strip(string:tokens(CaseStr, " "), both)]), + CPC <- string:lexemes(CaseStr, " ")]), #cp{cs={U,L,T,_}} = Entry = array:get(CP, Table), array:set(CP, Entry#cp{cs={U,L,T,Case}}, Table) end. @@ -869,10 +869,10 @@ optimize_ranges_1(Rs) -> hex_to_int([]) -> []; hex_to_int(HexStr) -> - list_to_integer(string:strip(HexStr, both), 16). + list_to_integer(string:trim(HexStr, both), 16). to_atom(Str) -> - list_to_atom(string:to_lower(string:strip(Str, both))). + list_to_atom(string:lowercase(string:trim(Str, both))). foldl(Fun, Acc, Fd) -> Get = fun() -> file:read_line(Fd) end, @@ -892,7 +892,7 @@ foldl_1(Fun, Acc, Get) -> -%% Differs from string:tokens, it returns empty string as token between two delimiters +%% Differs from string:lexemes, it returns empty string as token between two delimiters tokens(S, [C]) -> tokens(lists:reverse(S), C, []). diff --git a/lib/stdlib/vsn.mk b/lib/stdlib/vsn.mk index 8a83cdec1e..48db5dc900 100644 --- a/lib/stdlib/vsn.mk +++ b/lib/stdlib/vsn.mk @@ -1 +1 @@ -STDLIB_VSN = 3.4.1 +STDLIB_VSN = 3.4.2 |