8 files changed, 988 insertions, 107 deletions
diff --git a/lib/stdlib/doc/src/beam_lib.xml b/lib/stdlib/doc/src/beam_lib.xml
index d5ec90b060..031d79d0e2 100644
--- a/lib/stdlib/doc/src/beam_lib.xml
+++ b/lib/stdlib/doc/src/beam_lib.xml
@@ -42,10 +42,10 @@
       and the corresponding identifiers are as follows:</p>
 
     <list type="bulleted">
-      <item><c>abstract_code ("Abst")</c></item>
       <item><c>atoms ("Atom")</c></item>
       <item><c>attributes ("Attr")</c></item>
       <item><c>compile_info ("CInf")</c></item>
+      <item><c>debug_info ("Dbgi")</c></item>
       <item><c>exports ("ExpT")</c></item>
       <item><c>imports ("ImpT")</c></item>
       <item><c>indexed_imports ("ImpT")</c></item>
@@ -60,9 +60,8 @@
     <title>Debug Information/Abstract Code</title>
     <p>Option <c>debug_info</c> can be specified to the Compiler (see
       <seealso marker="compiler:compile#debug_info"><c>compile(3)</c></seealso>)
-      to have debug information in the form of abstract code (see section
-      <seealso marker="erts:absform">The Abstract Format</seealso> in the
-      ERTS User's Guide) stored in the <c>abstract_code</c> chunk.
+      to have debug information, such as <seealso marker="erts:absform">Erlang
+      Abstract Format</seealso>, stored in the <c>debug_info</c> chunk.
       Tools such as Debugger and Xref require the debug information to
       be included.</p>
 
@@ -79,7 +78,7 @@
 
   <section>
     <title>Reconstruct Source Code</title>
-    <p>The following example shows how to reconstruct source code from
+    <p>The following example shows how to reconstruct Erlang source code from
       the debug information in a BEAM file <c>Beam</c>:</p>
 
     <code type="none">
@@ -117,7 +116,7 @@ io:fwrite("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]).</code>
 
     <list type="ordered">
       <item>
-        <p>Use Compiler option <c>{debug_info,Key}</c>, see
+        <p>Use Compiler option <c>{debug_info_key,Key}</c>, see
           <seealso marker="compiler:compile#debug_info_key"><c>compile(3)</c></seealso>
           and function
           <seealso marker="#crypto_key_fun/1"><c>crypto_key_fun/1</c></seealso>
@@ -198,18 +197,40 @@ io:fwrite("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]).</code>
     <datatype>
       <name name="chunkid"/>
       <desc>
-        <p>"Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom"</p>
+        <p>"Attr" | "CInf" | "Dbgi" | "ExpT" | "ImpT" | "LocT" | "AtU8"</p>
       </desc>
     </datatype>
     <datatype>
       <name name="dataB"/>
     </datatype>
     <datatype>
+      <name name="debug_info"/>
+      <desc>
+        <p>The format stored in the <c>debug_info</c> chunk.
+          To retrieve particular code representation from the backend,
+          <c>Backend:debug_info(Format, Module, Data, Opts)</c> must be
+          invoked. <c>Format</c> is an atom, such as <c>erlang_v1</c> for
+          the Erlang Abstract Format or <c>core_v1</c> for Core Erlang.
+          <c>Module</c> is the module represented by the beam file and
+          <c>Data</c> is the value stored in the debug info chunk.
+          <c>Opts</c> is any list of values supported by the <c>Backend</c>.
+          <c>Backend:debug_info/4</c> must return <c>{ok, Code}</c> or
+          <c>{error, Term}</c>.</p>
+
+        <p>Developers must always invoke the <c>debug_info/4</c> function
+          and never rely on the <c>Data</c> stored in the <c>debug_info</c>
+          chunk, as it is opaque and may change at any moment. <c>no_debug_info</c>
+          means that chunk <c>"Dbgi"</c> is present, but empty.</p>
+      </desc>
+    </datatype>
+    <datatype>
       <name name="abst_code"/>
       <desc>
         <p>It is not checked that the forms conform to the abstract format
           indicated by <c><anno>AbstVersion</anno></c>. <c>no_abstract_code</c>
           means that chunk <c>"Abst"</c> is present, but empty.</p>
+        <p>For modules compiled with OTP 20 onwards, the <c>abst_code</c> chunk
+          is automatically computed from the <c>debug_info</c> chunk.</p>
       </desc>
     </datatype>
     <datatype>
@@ -346,7 +367,7 @@ io:fwrite("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]).</code>
       <desc>
         <p>Registers an unary fun
           that is called if <c>beam_lib</c> must read an
-          <c>abstract_code</c> chunk that has been encrypted. The fun
+          <c>debug_info</c> chunk that has been encrypted. The fun
           is held in a process that is started by the function.</p>
         <p>If a fun is already registered when attempting to
           register a fun, <c>{error, exists}</c> is returned.</p>
@@ -443,7 +464,8 @@ CryptoKeyFun(clear) -> term()</code>
       <desc>
         <p>Removes all chunks from a BEAM
           file except those needed by the loader. In particular,
-          the debug information (chunk <c>abstract_code</c>) is removed.</p>
+          the debug information (chunk <c>debug_info</c> and <c>abstract_code</c>)
+          is removed.</p>
       </desc>
     </func>
 
@@ -454,9 +476,9 @@ CryptoKeyFun(clear) -> term()</code>
       <desc>
         <p>Removes all chunks except
           those needed by the loader from BEAM files. In particular,
-          the debug information (chunk <c>abstract_code</c>) is removed.
-          The returned list contains one element for each specified filename,
-          in the same order as in <c>Files</c>.</p>
+          the debug information (chunk <c>debug_info</c> and <c>abstract_code</c>)
+          is removed. The returned list contains one element for each
+          specified filename, in the same order as in <c>Files</c>.</p>
       </desc>
     </func>
 
diff --git a/lib/stdlib/doc/src/gen_event.xml b/lib/stdlib/doc/src/gen_event.xml
index 42e952fd46..56cb7974a2 100644
--- a/lib/stdlib/doc/src/gen_event.xml
+++ b/lib/stdlib/doc/src/gen_event.xml
@@ -579,6 +579,13 @@ gen_event:stop             ----->  Module:terminate/2
         <v>Extra = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not export it.
+            If a release upgrade/downgrade with <c>Change={advanced,Extra}</c>
+            specified in the <c>.appup</c> file is made when <c>code_change/3</c>
+            isn't implemented the event handler will crash with an <c>undef</c> error
+            reason.</p>
+        </note>
         <p>This function is called for an installed event handler that
           is to update its internal state during a release
           upgrade/downgrade, that is, when the instruction
@@ -759,6 +766,12 @@ gen_event:stop             ----->  Module:terminate/2
         <v>&nbsp;&nbsp;Id = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_event</c> module provides a default
+            implementation of this function that logs about the unexpected
+            <c>Info</c> message, drops it and returns <c>{noreply, State}</c>.</p>
+        </note>
         <p>This function is called for each installed event handler when
           an event manager receives any other message than an event or
           a synchronous request (or a system message).</p>
@@ -815,6 +828,11 @@ gen_event:stop             ----->  Module:terminate/2
         <v>&nbsp;Args = Reason = Term = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_event</c> module provides a default
+            implementation without cleanup.</p>
+        </note>
         <p>Whenever an event handler is deleted from an event manager,
           this function is called. It is to be the opposite of
           <seealso marker="#Module:init/1"><c>Module:init/1</c></seealso>
diff --git a/lib/stdlib/doc/src/gen_fsm.xml b/lib/stdlib/doc/src/gen_fsm.xml
index 719ab2b558..691a039e34 100644
--- a/lib/stdlib/doc/src/gen_fsm.xml
+++ b/lib/stdlib/doc/src/gen_fsm.xml
@@ -562,6 +562,13 @@ gen_fsm:sync_send_all_state_event -----> Module:handle_sync_event/4
         <v>Extra = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not export it.
+            If a release upgrade/downgrade with <c>Change={advanced,Extra}</c>
+            specified in the <c>appup</c> file is made when <c>code_change/4</c>
+            isn't implemented the process will crash with an <c>undef</c> exit
+            reason.</p>
+        </note>
         <p>This function is called by a <c>gen_fsm</c> process when it is to
           update its internal state data during a release upgrade/downgrade,
           that is, when instruction <c>{update,Module,Change,...}</c>,
@@ -686,6 +693,13 @@ gen_fsm:sync_send_all_state_event -----> Module:handle_sync_event/4
         <v>&nbsp;Reason = normal | term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_fsm</c> module provides a default
+            implementation of this function that logs about the unexpected
+            <c>Info</c> message, drops it and returns
+            <c>{next_state, StateName, StateData}</c>.</p>
+        </note>
         <p>This function is called by a <c>gen_fsm</c> process when it receives
           any other message than a synchronous or asynchronous event (or a
           system message).</p>
@@ -899,6 +913,11 @@ gen_fsm:sync_send_all_state_event -----> Module:handle_sync_event/4
         <v>StateData = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_fsm</c> module provides a default
+            implementation without cleanup.</p>
+        </note>
         <p>This function is called by a <c>gen_fsm</c> process when it is about
           to terminate. It is to be the opposite of
           <seealso marker="#Module:init/1"><c>Module:init/1</c></seealso>
diff --git a/lib/stdlib/doc/src/gen_server.xml b/lib/stdlib/doc/src/gen_server.xml
index 662076b5f0..4540449792 100644
--- a/lib/stdlib/doc/src/gen_server.xml
+++ b/lib/stdlib/doc/src/gen_server.xml
@@ -504,6 +504,13 @@ gen_server:abcast     -----> Module:handle_cast/2
         <v>Reason = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not export it.
+            If a release upgrade/downgrade with <c>Change={advanced,Extra}</c>
+            specified in the <c>appup</c> file is made when <c>code_change/3</c>
+            isn't implemented the process will crash with an <c>undef</c> exit
+            reason.</p>
+        </note>
         <p>This function is called by a <c>gen_server</c> process when it is
           to update its internal state during a release upgrade/downgrade,
           that is, when the instruction <c>{update,Module,Change,...}</c>,
@@ -690,6 +697,12 @@ gen_server:abcast     -----> Module:handle_cast/2
         <v>&nbsp;Reason = normal | term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_server</c> module provides a default
+            implementation of this function that logs about the unexpected
+            <c>Info</c> message, drops it and returns <c>{noreply, State}</c>.</p>
+        </note>
         <p>This function is called by a <c>gen_server</c> process when a
           time-out occurs or when it receives any other message than a
           synchronous or asynchronous request (or a system message).</p>
@@ -750,6 +763,11 @@ gen_server:abcast     -----> Module:handle_cast/2
         <v>State = term()</v>
       </type>
       <desc>
+        <note>
+          <p>This callback is optional, so callback modules need not
+            export it. The <c>gen_server</c> module provides a default
+            implementation without cleanup.</p>
+        </note>
         <p>This function is called by a <c>gen_server</c> process when it is
           about to terminate. It is to be the opposite of
           <seealso marker="#Module:init/1"><c>Module:init/1</c></seealso>
diff --git a/lib/stdlib/doc/src/sets.xml b/lib/stdlib/doc/src/sets.xml
index 44dc104645..4dc5d68151 100644
--- a/lib/stdlib/doc/src/sets.xml
+++ b/lib/stdlib/doc/src/sets.xml
@@ -40,7 +40,7 @@
 
     <p>This module provides the same interface as the
       <seealso marker="ordsets"><c>ordsets(3)</c></seealso> module
-      but with a defined representation. One difference is
+      but with an undefined representation. One difference is
       that while this module considers two elements as different if they
       do not match (<c>=:=</c>), <c>ordsets</c> considers two elements as
       different if and only if they do not compare equal (<c>==</c>).</p>
diff --git a/lib/stdlib/doc/src/string.xml b/lib/stdlib/doc/src/string.xml
index dddedf1132..dc83c40a9a 100644
--- a/lib/stdlib/doc/src/string.xml
+++ b/lib/stdlib/doc/src/string.xml
@@ -36,8 +36,613 @@
   <modulesummary>String processing functions.</modulesummary>
   <description>
     <p>This module provides functions for string processing.</p>
+    <p>A string in this module is represented by <seealso marker="unicode#type-chardata">
+    <c>unicode:chardata()</c></seealso>, that is, a list of codepoints,
+    binaries with UTF-8-encoded codepoints
+    (<em>UTF-8 binaries</em>), or a mix of the two.</p>
+    <code>
+"abcd"               is a valid string
+&lt;&lt;"abcd">>           is a valid string
+["abcd"]             is a valid string
+&lt;&lt;"abc..åäö"/utf8>>  is a valid string
+&lt;&lt;"abc..åäö">>       is NOT a valid string,
+                     but a binary with Latin-1-encoded codepoints
+[&lt;&lt;"abc">>, "..åäö"] is a valid string
+[atom]               is NOT a valid string</code>
+    <p>
+      This module operates on grapheme clusters. A <em>grapheme cluster</em>
+      is a user-perceived character, which can be represented by several
+      codepoints.
+    </p>
+    <code>
+"å"  [229] or [97, 778]
+"e̊"  [101, 778]</code>
+    <p>
+      The string length of "ß↑e̊" is 3, even though it is represented by the
+      codepoints <c>[223,8593,101,778]</c> or the UTF-8 binary
+      <c>&lt;&lt;195,159,226,134,145,101,204,138>></c>.
+    </p>
+    <p>
+      Grapheme clusters for codepoints of class <c>prepend</c>
+      and non-modern (or decomposed) Hangul is not handled for performance
+      reasons in
+      <seealso marker="#find/3"><c>find/3</c></seealso>,
+      <seealso marker="#replace/3"><c>replace/3</c></seealso>,
+      <seealso marker="#split/2"><c>split/2</c></seealso>,
+      <seealso marker="#lexemes/2"><c>split/2</c></seealso> and
+      <seealso marker="#trim/3"><c>trim/3</c></seealso>.
+    </p>
+    <p>
+      Splitting and appending strings is to be done on grapheme clusters
+      borders.
+      There is no verification that the results of appending strings are
+      valid or normalized.
+    </p>
+    <p>
+      Most of the functions expect all input to be normalized to one form,
+      see for example <seealso marker="unicode#characters_to_nfc_list/1">
+      <c>unicode:characters_to_nfc_list/1</c></seealso>.
+    </p>
+    <p>
+      Language or locale specific handling of input is not considered
+      in any function.
+    </p>
+    <p>
+      The functions can crash for non-valid input strings. For example,
+      the functions expect UTF-8 binaries but not all functions
+      verify that all binaries are encoded correctly.
+    </p>
+    <p>
+      Unless otherwise specified the return value type is the same as
+      the input type. That is, binary input returns binary output,
+      list input returns a list output, and mixed input can return a
+      mixed output.</p>
+      <code>
+1> string:trim("  sarah  ").
+"sarah"
+2> string:trim(&lt;&lt;"  sarah  ">>).
+&lt;&lt;"sarah">>
+3> string:lexemes("foo bar", " ").
+["foo","bar"]
+4> string:lexemes(&lt;&lt;"foo bar">>, " ").
+[&lt;&lt;"foo">>,&lt;&lt;"bar">>]</code>
+    <p>This module has been reworked in Erlang/OTP 20 to
+    handle <seealso marker="unicode#type-chardata">
+    <c>unicode:chardata()</c></seealso> and operate on grapheme
+    clusters. The <seealso marker="#oldapi"> <c>old
+    functions</c></seealso> that only work on Latin-1 lists as input
+    are still available but should not be
+    used. They will be deprecated in Erlang/OTP 21.
+    </p>
   </description>
 
+  <datatypes>
+    <datatype>
+      <name name="direction"/>
+      <name name="grapheme_cluster"/>
+      <desc>
+        <p>A user-perceived character, consisting of one or more
+        codepoints.</p>
+      </desc>
+    </datatype>
+  </datatypes>
+
+  <funcs>
+
+    <func>
+      <name name="casefold" arity="1"/>
+      <fsummary>Convert a string to a comparable string.</fsummary>
+      <desc>
+        <p>
+	  Converts <c><anno>String</anno></c> to a case-agnostic
+	  comparable string. Function <c>casefold/1</c> is preferred
+	  over <c>lowercase/1</c> when two strings are to be compared
+	  for equality. See also <seealso marker="#equal/4"><c>equal/4</c></seealso>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:casefold("Ω and ẞ SHARP S").</input>
+"ω and ss sharp s"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="chomp" arity="1"/>
+      <fsummary>Remove trailing end of line control characters.</fsummary>
+      <desc>
+        <p>
+	  Returns a string where any trailing <c>\n</c> or
+	  <c>\r\n</c> have been removed from <c><anno>String</anno></c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+182> <input>string:chomp(&lt;&lt;"\nHello\n\n">>).</input>
+&lt;&lt;"\nHello">>
+183> <input>string:chomp("\nHello\r\r\n").</input>
+"\nHello\r"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="equal" arity="2"/>
+      <name name="equal" arity="3"/>
+      <name name="equal" arity="4"/>
+      <fsummary>Test string equality.</fsummary>
+      <desc>
+        <p>
+	  Returns <c>true</c> if <c><anno>A</anno></c> and
+          <c><anno>B</anno></c> are equal, otherwise <c>false</c>.
+	</p>
+	<p>
+	  If <c><anno>IgnoreCase</anno></c> is <c>true</c>
+	  the function does <seealso marker="#casefold/1">
+	  <c>casefold</c>ing</seealso> on the fly before the equality test.
+	</p>
+	<p>If <c><anno>Norm</anno></c> is not <c>none</c>
+	the function applies normalization on the fly before the equality test.
+	There are four available normalization forms:
+	<seealso marker="unicode#characters_to_nfc_list/1"> <c>nfc</c></seealso>,
+	<seealso marker="unicode#characters_to_nfd_list/1"> <c>nfd</c></seealso>,
+	<seealso marker="unicode#characters_to_nfkc_list/1"> <c>nfkc</c></seealso>, and
+	<seealso marker="unicode#characters_to_nfkd_list/1"> <c>nfkd</c></seealso>.
+	</p>
+	<p>By default,
+	<c><anno>IgnoreCase</anno></c> is <c>false</c> and
+	<c><anno>Norm</anno></c> is <c>none</c>.</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:equal("åäö", &lt;&lt;"åäö"/utf8>>).</input>
+true
+2> <input>string:equal("åäö", unicode:characters_to_nfd_binary("åäö")).</input>
+false
+3> <input>string:equal("åäö", unicode:characters_to_nfd_binary("ÅÄÖ"), true, nfc).</input>
+true</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="find" arity="2"/>
+      <name name="find" arity="3"/>
+      <fsummary>Find start of substring.</fsummary>
+      <desc>
+        <p>
+	  Removes anything before <c><anno>SearchPattern</anno></c> in <c><anno>String</anno></c>
+	  and returns the remainder of the string or <c>nomatch</c> if <c><anno>SearchPattern</anno></c> is not
+	  found.
+          <c><anno>Dir</anno></c>, which can be <c>leading</c> or
+	  <c>trailing</c>, indicates from which direction characters
+	  are to be searched.
+        </p>
+	<p>
+          By default, <c><anno>Dir</anno></c> is <c>leading</c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:find("ab..cd..ef", ".").</input>
+"..cd..ef"
+2> <input>string:find(&lt;&lt;"ab..cd..ef">>, "..", trailing).</input>
+&lt;&lt;"..ef">>
+3> <input>string:find(&lt;&lt;"ab..cd..ef">>, "x", leading).</input>
+nomatch
+4> <input>string:find("ab..cd..ef", "x", trailing).</input>
+nomatch</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="is_empty" arity="1"/>
+      <fsummary>Check if the string is empty.</fsummary>
+      <desc>
+        <p>Returns <c>true</c> if <c><anno>String</anno></c> is the
+        empty string, otherwise <c>false</c>.</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:is_empty("foo").</input>
+false
+2> <input>string:is_empty(["",&lt;&lt;>>]).</input>
+true</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="length" arity="1"/>
+      <fsummary>Calculate length of the string.</fsummary>
+      <desc>
+        <p>
+	  Returns the number of grapheme clusters in <c><anno>String</anno></c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:length("ß↑e̊").</input>
+3
+2> <input>string:length(&lt;&lt;195,159,226,134,145,101,204,138>>).</input>
+3</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="lexemes" arity="2"/>
+      <fsummary>Split string into lexemes.</fsummary>
+      <desc>
+	<p>
+	  Returns a list of lexemes in <c><anno>String</anno></c>, separated
+          by the grapheme clusters in <c><anno>SeparatorList</anno></c>.
+	</p>
+	<p>
+	  Notice that, as shown in this example, two or more
+          adjacent separator graphemes clusters in <c><anno>String</anno></c>
+          are treated as one. That is, there are no empty
+          strings in the resulting list of lexemes.
+	  See also <seealso marker="#split/3"><c>split/3</c></seealso> which returns
+	  empty strings.
+	</p>
+	<p>Notice that <c>[$\r,$\n]</c> is one grapheme cluster.</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:lexemes("abc de̊fxxghix jkl\r\nfoo", "x e" ++ [[$\r,$\n]]).</input>
+["abc","de̊f","ghi","jkl","foo"]
+2> <input>string:lexemes(&lt;&lt;"abc de̊fxxghix jkl\r\nfoo"/utf8>>, "x e" ++ [$\r,$\n]).</input>
+[&lt;&lt;"abc">>,&lt;&lt;"de̊f"/utf8>>,&lt;&lt;"ghi">>,&lt;&lt;"jkl\r\nfoo">>]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="lowercase" arity="1"/>
+      <fsummary>Convert a string to lowercase</fsummary>
+      <desc>
+        <p>
+	  Converts <c><anno>String</anno></c> to lowercase.
+	</p>
+	<p>
+	  Notice that function <seealso marker="#casefold/1"><c>casefold/1</c></seealso>
+	  should be used when converting a string to
+	  be tested for equality.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+2> <input>string:lowercase(string:uppercase("Michał")).</input>
+"michał"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="next_codepoint" arity="1"/>
+      <fsummary>Pick the first codepoint.</fsummary>
+      <desc>
+        <p>
+	  Returns the first codepoint in <c><anno>String</anno></c>
+	  and the rest of <c><anno>String</anno></c> in the tail.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:next_codepoint(unicode:characters_to_binary("e̊fg")).</input>
+[101|&lt;&lt;"̊fg"/utf8>>]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="next_grapheme" arity="1"/>
+      <fsummary>Pick the first grapheme cluster.</fsummary>
+      <desc>
+        <p>
+	  Returns the first grapheme cluster in <c><anno>String</anno></c>
+	  and the rest of <c><anno>String</anno></c> in the tail.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:next_grapheme(unicode:characters_to_binary("e̊fg")).</input>
+["e̊"|&lt;&lt;"fg">>]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="nth_lexeme" arity="3"/>
+      <fsummary>Pick the nth lexeme.</fsummary>
+      <desc>
+	<p>Returns lexeme number <c><anno>N</anno></c> in
+	<c><anno>String</anno></c>, where lexemes are separated by
+	the grapheme clusters in <c><anno>SeparatorList</anno></c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:nth_lexeme("abc.de̊f.ghiejkl", 3, ".e").</input>
+"ghi"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="pad" arity="2"/>
+      <name name="pad" arity="3"/>
+      <name name="pad" arity="4"/>
+      <fsummary>Pad a string to given length.</fsummary>
+      <desc>
+        <p>
+	  Pads <c><anno>String</anno></c> to <c><anno>Length</anno></c> with
+	  grapheme cluster <c><anno>Char</anno></c>.
+	  <c><anno>Dir</anno></c>, which can be <c>leading</c>, <c>trailing</c>,
+	  or <c>both</c>, indicates where the padding should be added.
+	</p>
+	<p>By default, <c><anno>Char</anno></c> is <c>$\s</c> and
+	<c><anno>Dir</anno></c> is <c>trailing</c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:pad(&lt;&lt;"He̊llö"/utf8>>, 8).</input>
+[&lt;&lt;72,101,204,138,108,108,195,182>>,32,32,32]
+2> <input>io:format("'~ts'~n",[string:pad("He̊llö", 8, leading)]).</input>
+'   He̊llö'
+3> <input>io:format("'~ts'~n",[string:pad("He̊llö", 8, both)]).</input>
+' He̊llö  '</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="prefix" arity="2"/>
+      <fsummary>Remove prefix from string.</fsummary>
+      <desc>
+        <p>
+	  If <c><anno>Prefix</anno></c> is the prefix of
+	  <c><anno>String</anno></c>, removes it and returns the
+	  remainder of <c><anno>String</anno></c>, otherwise returns
+	  <c>nomatch</c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:prefix(&lt;&lt;"prefix of string">>, "pre").</input>
+&lt;&lt;"fix of string">>
+2> <input>string:prefix("pre", "prefix").</input>
+nomatch</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="replace" arity="3"/>
+      <name name="replace" arity="4"/>
+      <fsummary>Replace a pattern in string.</fsummary>
+      <desc>
+        <p>
+	  Replaces <c><anno>SearchPattern</anno></c> in <c><anno>String</anno></c>
+	  with <c><anno>Replacement</anno></c>.
+	  <c><anno>Where</anno></c>, default <c>leading</c>, indicates whether
+	  the <c>leading</c>, the <c>trailing</c> or <c>all</c> encounters of
+	  <c><anno>SearchPattern</anno></c> are to be replaced.
+	</p>
+	<p>Can be implemented as:</p>
+	<pre>lists:join(Replacement, split(String, SearchPattern, Where)).</pre>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:replace(&lt;&lt;"ab..cd..ef">>, "..", "*").</input>
+[&lt;&lt;"ab">>,"*",&lt;&lt;"cd..ef">>]
+2> <input>string:replace(&lt;&lt;"ab..cd..ef">>, "..", "*", all).</input>
+[&lt;&lt;"ab">>,"*",&lt;&lt;"cd">>,"*",&lt;&lt;"ef">>]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="reverse" arity="1"/>
+      <fsummary>Reverses a string</fsummary>
+      <desc>
+        <p>
+	  Returns the reverse list of the grapheme clusters in <c><anno>String</anno></c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> Reverse = <input>string:reverse(unicode:characters_to_nfd_binary("ÅÄÖ")).</input>
+[[79,776],[65,776],[65,778]]
+2> <input>io:format("~ts~n",[Reverse]).</input>
+ÖÄÅ</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="slice" arity="2"/>
+      <name name="slice" arity="3"/>
+      <fsummary>Extract a part of string</fsummary>
+      <desc>
+	<p>Returns a substring of <c><anno>String</anno></c> of
+	at most <c><anno>Length</anno></c> grapheme clusters, starting at position
+	<c><anno>Start</anno></c>.</p>
+	<p>By default, <c><anno>Length</anno></c> is <c>infinity</c>.</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:slice(&lt;&lt;"He̊llö Wörld"/utf8>>, 4).</input>
+&lt;&lt;"ö Wörld"/utf8>>
+2> <input>string:slice(["He̊llö ", &lt;&lt;"Wörld"/utf8>>], 4,4).</input>
+"ö Wö"
+3> <input>string:slice(["He̊llö ", &lt;&lt;"Wörld"/utf8>>], 4,50).</input>
+"ö Wörld"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="split" arity="2"/>
+      <name name="split" arity="3"/>
+      <fsummary>Split a string into substrings.</fsummary>
+      <desc>
+        <p>
+	  Splits <c><anno>String</anno></c> where <c><anno>SearchPattern</anno></c>
+	  is encountered and return the remaining parts.
+	  <c><anno>Where</anno></c>, default <c>leading</c>, indicates whether
+	  the <c>leading</c>, the <c>trailing</c> or <c>all</c> encounters of
+	  <c><anno>SearchPattern</anno></c> will split <c><anno>String</anno></c>.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+0> <input>string:split("ab..bc..cd", "..").</input>
+["ab","bc..cd"]
+1> <input>string:split(&lt;&lt;"ab..bc..cd">>, "..", trailing).</input>
+[&lt;&lt;"ab..bc">>,&lt;&lt;"cd">>]
+2> <input>string:split(&lt;&lt;"ab..bc....cd">>, "..", all).</input>
+[&lt;&lt;"ab">>,&lt;&lt;"bc">>,&lt;&lt;>>,&lt;&lt;"cd">>]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="take" arity="2"/>
+      <name name="take" arity="3"/>
+      <name name="take" arity="4"/>
+      <fsummary>Take leading or trailing parts.</fsummary>
+      <desc>
+        <p>Takes characters from <c><anno>String</anno></c> as long as
+        the characters are members of set <c><anno>Characters</anno></c>
+	or the complement of set <c><anno>Characters</anno></c>.
+        <c><anno>Dir</anno></c>,
+        which can be <c>leading</c> or <c>trailing</c>, indicates from
+        which direction characters are to be taken.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+5> <input>string:take("abc0z123", lists:seq($a,$z)).</input>
+{"abc","0z123"}
+6> <input>string:take(&lt;&lt;"abc0z123">>, lists:seq($0,$9), true, leading).</input>
+{&lt;&lt;"abc">>,&lt;&lt;"0z123">>}
+7> <input>string:take("abc0z123", lists:seq($0,$9), false, trailing).</input>
+{"abc0z","123"}
+8> <input>string:take(&lt;&lt;"abc0z123">>, lists:seq($a,$z), true, trailing).</input>
+{&lt;&lt;"abc0z">>,&lt;&lt;"123">>}</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="titlecase" arity="1"/>
+      <fsummary>Convert a string to titlecase.</fsummary>
+      <desc>
+        <p>
+	  Converts <c><anno>String</anno></c> to titlecase.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:titlecase("ß is a SHARP s").</input>
+"Ss is a SHARP s"</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="to_float" arity="1"/>
+      <fsummary>Return a float whose text representation is the integers
+        (ASCII values) of a string.</fsummary>
+      <desc>
+        <p>Argument <c><anno>String</anno></c> is expected to start with a
+          valid text represented float (the digits are ASCII values).
+          Remaining characters in the string after the float are returned in
+          <c><anno>Rest</anno></c>.</p>
+        <p><em>Example:</em></p>
+        <pre>
+> <input>{F1,Fs} = string:to_float("1.0-1.0e-1"),</input>
+> <input>{F2,[]} = string:to_float(Fs),</input>
+> <input>F1+F2.</input>
+0.9
+> <input>string:to_float("3/2=1.5").</input>
+{error,no_float}
+> <input>string:to_float("-1.5eX").</input>
+{-1.5,"eX"}</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="to_integer" arity="1"/>
+      <fsummary>Return an integer whose text representation is the integers
+        (ASCII values) of a string.</fsummary>
+      <desc>
+        <p>Argument <c><anno>String</anno></c> is expected to start with a
+          valid text represented integer (the digits are ASCII values).
+          Remaining characters in the string after the integer are returned in
+          <c><anno>Rest</anno></c>.</p>
+        <p><em>Example:</em></p>
+        <pre>
+> <input>{I1,Is} = string:to_integer("33+22"),</input>
+> <input>{I2,[]} = string:to_integer(Is),</input>
+> <input>I1-I2.</input>
+11
+> <input>string:to_integer("0.5").</input>
+{0,".5"}
+> <input>string:to_integer("x=2").</input>
+{error,no_integer}</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="to_graphemes" arity="1"/>
+      <fsummary>Convert a string to a list of grapheme clusters.</fsummary>
+      <desc>
+        <p>
+	  Converts <c><anno>String</anno></c> to a list of grapheme clusters.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:to_graphemes("ß↑e̊").</input>
+[223,8593,[101,778]]
+2> <input>string:to_graphemes(&lt;&lt;"ß↑e̊"/utf8>>).</input>
+[223,8593,[101,778]]</pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="trim" arity="1"/>
+      <name name="trim" arity="2"/>
+      <name name="trim" arity="3"/>
+      <fsummary>Trim leading or trailing, or both, characters.</fsummary>
+      <desc>
+	<p>
+	  Returns a string, where leading or trailing, or both,
+	  <c><anno>Characters</anno></c> have been removed.
+	  <c><anno>Dir</anno></c> which can be <c>leading</c>, <c>trailing</c>,
+	  or <c>both</c>, indicates from which direction characters
+	  are to be removed.
+	</p>
+	<p> Default <c><anno>Characters</anno></c> are the set of
+	nonbreakable whitespace codepoints, defined as
+	Pattern_White_Space in
+	<url href="http://unicode.org/reports/tr31/">Unicode Standard Annex #31</url>.
+	<c>By default, <anno>Dir</anno></c> is <c>both</c>.
+	</p>
+	<p>
+	  Notice that <c>[$\r,$\n]</c> is one grapheme cluster according
+	  to the Unicode Standard.
+	</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:trim("\t  Hello  \n").</input>
+"Hello"
+2> <input>string:trim(&lt;&lt;"\t  Hello  \n">>, leading).</input>
+&lt;&lt;"Hello  \n">>
+3> <input>string:trim(&lt;&lt;".Hello.\n">>, trailing, "\n.").</input>
+&lt;&lt;".Hello">></pre>
+      </desc>
+    </func>
+
+    <func>
+      <name name="uppercase" arity="1"/>
+      <fsummary>Convert a string to uppercase.</fsummary>
+      <desc>
+        <p>
+	  Converts <c><anno>String</anno></c> to uppercase.
+	</p>
+	<p>See also <seealso marker="#titlecase/1"><c>titlecase/1</c></seealso>.</p>
+	<p><em>Example:</em></p>
+	<pre>
+1> <input>string:uppercase("Michał").</input>
+"MICHAŁ"</pre>
+      </desc>
+    </func>
+
+  </funcs>
+
+  <section>
+    <marker id="oldapi"/>
+    <title>Obsolete API functions</title>
+    <p>Here follows the function of the old API.
+    These functions only work on a list of Latin-1 characters.
+    </p>
+    <note><p>
+      The functions are kept for backward compatibility, but are
+      not recommended.
+      They will be deprecated in Erlang/OTP 21.
+    </p>
+    <p>Any undocumented functions in <c>string</c> are not to be used.</p>
+    </note>
+  </section>
+
   <funcs>
     <func>
       <name name="centre" arity="2"/>
@@ -47,17 +652,24 @@
         <p>Returns a string, where <c><anno>String</anno></c> is centered in the
           string and surrounded by blanks or <c><anno>Character</anno></c>.
 	  The resulting string has length <c><anno>Number</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#pad/3"><c>pad/3</c></seealso>.
+	</p>
       </desc>
     </func>
 
     <func>
       <name name="chars" arity="2"/>
       <name name="chars" arity="3"/>
-      <fsummary>Returns a string consisting of numbers of characters.</fsummary>
+      <fsummary>Return a string consisting of numbers of characters.</fsummary>
       <desc>
         <p>Returns a string consisting of <c><anno>Number</anno></c> characters
           <c><anno>Character</anno></c>. Optionally, the string can end with
           string <c><anno>Tail</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p>
       </desc>
     </func>
 
@@ -69,6 +681,9 @@
         <p>Returns the index of the first occurrence of
           <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns
           <c>0</c> if <c><anno>Character</anno></c> does not occur.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#find/2"><c>find/2</c></seealso>.</p>
       </desc>
     </func>
 
@@ -79,6 +694,16 @@
         <p>Concatenates <c><anno>String1</anno></c> and
           <c><anno>String2</anno></c> to form a new string
           <c><anno>String3</anno></c>, which is returned.</p>
+	<p>
+	  This function is <seealso marker="#oldapi">obsolete</seealso>.
+	  Use <c>[<anno>String1</anno>, <anno>String2</anno>]</c> as
+	  <c>Data</c> argument, and call
+	  <seealso marker="unicode#characters_to_list/2">
+	  <c>unicode:characters_to_list/2</c></seealso> or
+	  <seealso marker="unicode#characters_to_binary/2">
+	  <c>unicode:characters_to_binary/2</c></seealso>
+	  to flatten the output.
+	</p>
       </desc>
     </func>
 
@@ -88,6 +713,9 @@
       <desc>
         <p>Returns a string containing <c><anno>String</anno></c> repeated
           <c><anno>Number</anno></c> times.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="lists#duplicate/2"><c>lists:duplicate/2</c></seealso>.</p>
       </desc>
     </func>
 
@@ -98,6 +726,9 @@
         <p>Returns the length of the maximum initial segment of
           <c><anno>String</anno></c>, which consists entirely of characters
           not from <c><anno>Chars</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#take/3"><c>take/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:cspan("\t    abcdef", " \t").
@@ -106,20 +737,14 @@
     </func>
 
     <func>
-      <name name="equal" arity="2"/>
-      <fsummary>Test string equality.</fsummary>
-      <desc>
-        <p>Returns <c>true</c> if <c><anno>String1</anno></c> and
-          <c><anno>String2</anno></c> are equal, otherwise <c>false</c>.</p>
-      </desc>
-    </func>
-
-    <func>
       <name name="join" arity="2"/>
       <fsummary>Join a list of strings with separator.</fsummary>
       <desc>
         <p>Returns a string with the elements of <c><anno>StringList</anno></c>
           separated by the string in <c><anno>Separator</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="lists#join/2"><c>lists:join/2</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > join(["one", "two", "three"], ", ").
@@ -137,6 +762,10 @@
           fixed. If <c>length(<anno>String</anno>)</c> &lt;
           <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded
           with blanks or <c><anno>Character</anno></c>s.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#pad/2"><c>pad/2</c></seealso> or
+	<seealso marker="#pad/3"><c>pad/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:left("Hello",10,$.).
@@ -149,6 +778,9 @@
       <fsummary>Return the length of a string.</fsummary>
       <desc>
         <p>Returns the number of characters in <c><anno>String</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#length/1"><c>length/1</c></seealso>.</p>
       </desc>
     </func>
 
@@ -160,6 +792,9 @@
         <p>Returns the index of the last occurrence of
           <c><anno>Character</anno></c> in <c><anno>String</anno></c>. Returns
           <c>0</c> if <c><anno>Character</anno></c> does not occur.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#find/3"><c>find/3</c></seealso>.</p>
       </desc>
     </func>
 
@@ -173,6 +808,9 @@
           fixed. If the length of <c>(<anno>String</anno>)</c> &lt;
           <c><anno>Number</anno></c>, then <c><anno>String</anno></c> is padded
           with blanks or <c><anno>Character</anno></c>s.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#pad/3"><c>pad/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:right("Hello", 10, $.).
@@ -188,6 +826,9 @@
           <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>.
           Returns <c>0</c> if <c><anno>SubString</anno></c>
           does not exist in <c><anno>String</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#find/3"><c>find/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:rstr(" Hello Hello World World ", "Hello World").
@@ -202,6 +843,9 @@
         <p>Returns the length of the maximum initial segment of
           <c><anno>String</anno></c>, which consists entirely of characters
           from <c><anno>Chars</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#take/2"><c>take/2</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:span("\t    abcdef", " \t").
@@ -217,6 +861,9 @@
           <c><anno>SubString</anno></c> begins in <c><anno>String</anno></c>.
           Returns <c>0</c> if <c><anno>SubString</anno></c>
           does not exist in <c><anno>String</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#find/2"><c>find/2</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:str(" Hello Hello World World ", "Hello World").
@@ -230,12 +877,15 @@
       <name name="strip" arity="3"/>
       <fsummary>Strip leading or trailing characters.</fsummary>
       <desc>
-        <p>Returns a string, where leading and/or trailing blanks or a
+        <p>Returns a string, where leading or trailing, or both, blanks or a
           number of <c><anno>Character</anno></c> have been removed.
           <c><anno>Direction</anno></c>, which can be <c>left</c>, <c>right</c>,
           or <c>both</c>, indicates from which direction blanks are to be
           removed. <c>strip/1</c> is equivalent to
           <c>strip(String, both)</c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#trim/3"><c>trim/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:strip("...Hello.....", both, $.).
@@ -251,6 +901,9 @@
         <p>Returns a substring of <c><anno>String</anno></c>, starting at
           position <c><anno>Start</anno></c> to the end of the string, or to
           and including position <c><anno>Stop</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#slice/3"><c>slice/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 sub_string("Hello World", 4, 8).
@@ -266,6 +919,9 @@ sub_string("Hello World", 4, 8).
         <p>Returns a substring of <c><anno>String</anno></c>, starting at
           position <c><anno>Start</anno></c>, and ending at the end of the
           string or at length <c><anno>Length</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#slice/3"><c>slice/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > substr("Hello World", 4, 5).
@@ -281,6 +937,9 @@ sub_string("Hello World", 4, 8).
         <p>Returns the word in position <c><anno>Number</anno></c> of
           <c><anno>String</anno></c>. Words are separated by blanks or
           <c><anno>Character</anno></c>s.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#nth_lexeme/3"><c>nth_lexeme/3</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > string:sub_word(" Hello old boy !",3,$o).
@@ -289,50 +948,6 @@ sub_string("Hello World", 4, 8).
     </func>
 
     <func>
-      <name name="to_float" arity="1"/>
-      <fsummary>Returns a float whose text representation is the integers
-        (ASCII values) in a string.</fsummary>
-      <desc>
-        <p>Argument <c><anno>String</anno></c> is expected to start with a
-          valid text represented float (the digits are ASCII values).
-          Remaining characters in the string after the float are returned in
-          <c><anno>Rest</anno></c>.</p>
-        <p><em>Example:</em></p>
-        <code type="none">
-> {F1,Fs} = string:to_float("1.0-1.0e-1"),
-> {F2,[]} = string:to_float(Fs),
-> F1+F2.
-0.9
-> string:to_float("3/2=1.5").
-{error,no_float}
-> string:to_float("-1.5eX").
-{-1.5,"eX"}</code>
-      </desc>
-    </func>
-
-    <func>
-      <name name="to_integer" arity="1"/>
-      <fsummary>Returns an integer whose text representation is the integers
-        (ASCII values) in a string.</fsummary>
-      <desc>
-        <p>Argument <c><anno>String</anno></c> is expected to start with a
-          valid text represented integer (the digits are ASCII values).
-          Remaining characters in the string after the integer are returned in
-          <c><anno>Rest</anno></c>.</p>
-        <p><em>Example:</em></p>
-        <code type="none">
-> {I1,Is} = string:to_integer("33+22"),
-> {I2,[]} = string:to_integer(Is),
-> I1-I2.
-11
-> string:to_integer("0.5").
-{0,".5"}
-> string:to_integer("x=2").
-{error,no_integer}</code>
-      </desc>
-    </func>
-
-    <func>
       <name name="to_lower" arity="1" clause_i="1"/>
       <name name="to_lower" arity="1" clause_i="2"/>
       <name name="to_upper" arity="1" clause_i="1"/>
@@ -346,6 +961,11 @@ sub_string("Hello World", 4, 8).
         <p>The specified string or character is case-converted. Notice that
           the supported character set is ISO/IEC 8859-1 (also called Latin 1);
           all values outside this set are unchanged</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso> use
+	<seealso marker="#lowercase/1"><c>lowercase/1</c></seealso>,
+	<seealso marker="#uppercase/1"><c>uppercase/1</c></seealso>,
+	<seealso marker="#titlecase/1"><c>titlecase/1</c></seealso> or
+	<seealso marker="#casefold/1"><c>casefold/1</c></seealso>.</p>
       </desc>
     </func>
 
@@ -363,6 +983,9 @@ sub_string("Hello World", 4, 8).
           adjacent separator characters in <c><anno>String</anno></c>
           are treated as one. That is, there are no empty
           strings in the resulting list of tokens.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p>
       </desc>
     </func>
 
@@ -373,6 +996,9 @@ sub_string("Hello World", 4, 8).
       <desc>
         <p>Returns the number of words in <c><anno>String</anno></c>, separated
           by blanks or <c><anno>Character</anno></c>.</p>
+	<p>This function is <seealso marker="#oldapi">obsolete</seealso>.
+	Use
+	<seealso marker="#lexemes/2"><c>lexemes/2</c></seealso>.</p>
         <p><em>Example:</em></p>
         <code type="none">
 > words(" Hello old boy!", $o).
@@ -387,10 +1013,7 @@ sub_string("Hello World", 4, 8).
       other. The reason is that this string package is the
       combination of two earlier packages and all functions of
       both packages have been retained.</p>
-
-    <note>
-      <p>Any undocumented functions in <c>string</c> are not to be used.</p>
-    </note>
   </section>
+
 </erlref>
 
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index 93d0d37456..382b253ba1 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -50,8 +50,35 @@
     external entities where this is required. When working inside the
     Erlang/OTP environment, it is recommended to keep binaries in UTF-8 when
     representing Unicode characters. ISO Latin-1 encoding is supported both
-    for backward compatibility and for communication 
-    with external entities not supporting Unicode character sets.</p>
+    for backward compatibility and for communication
+  with external entities not supporting Unicode character sets.</p>
+  <p>Programs should always operate on a normalized form and compare
+  canonical-equivalent Unicode characters as equal. All characters
+  should thus be normalized to one form once on the system borders.
+  One of the following functions can convert characters to their
+  normalized forms <seealso marker="#characters_to_nfc_list/1">
+  <c>characters_to_nfc_list/1</c></seealso>,
+  <seealso marker="#characters_to_nfc_binary/1">
+    <c>characters_to_nfc_binary/1</c></seealso>,
+    <seealso marker="#characters_to_nfd_list/1">
+    <c>characters_to_nfd_list/1</c></seealso> or
+    <seealso marker="#characters_to_nfd_binary/1">
+      <c>characters_to_nfd_binary/1</c></seealso>.
+  For general text
+  <seealso marker="#characters_to_nfc_list/1">
+    <c>characters_to_nfc_list/1</c></seealso> or
+    <seealso marker="#characters_to_nfc_binary/1">
+      <c>characters_to_nfc_binary/1</c></seealso> is preferred, and
+      for identifiers one of the compatibility normalization
+      functions, such as
+      <seealso marker="#characters_to_nfkc_list/1">
+      <c>characters_to_nfkc_list/1</c></seealso>,
+      is preferred for security reasons.
+      The normalization functions where introduced in OTP 20.
+      Additional information on normalization can be found in the
+      <url href="http://unicode.org/faq/normalization.html">Unicode FAQ</url>.
+  </p>
+
   </description>
 
   <datatypes>
@@ -335,6 +362,154 @@ decode_data(Data) ->
     </func>
 
     <func>
+      <name name="characters_to_nfc_list" arity="1"/>
+      <fsummary>Normalize characters to a list of canonical equivalent
+      composed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of canonical equivalent Composed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.
+	</p>
+	<p>The result is a list of characters.</p>
+        <code>
+3> unicode:characters_to_nfc_list([&lt;&lt;"abc..a">>,[778],$a,[776],$o,[776]]).
+"abc..åäö"
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfc_binary" arity="1"/>
+      <fsummary>Normalize characters to a utf8 binary of canonical equivalent
+      composed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of canonical equivalent Composed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.</p>
+	<p>The result is an utf8 encoded binary.</p>
+        <code>
+4> unicode:characters_to_nfc_binary([&lt;&lt;"abc..a">>,[778],$a,[776],$o,[776]]).
+&lt;&lt;"abc..åäö"/utf8>>
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfd_list" arity="1"/>
+      <fsummary>Normalize characters to a list of canonical equivalent
+      decomposed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of canonical equivalent Decomposed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.
+	</p>
+	<p>The result is a list of characters.</p>
+        <code>
+1> unicode:characters_to_nfd_list("abc..åäö").
+[97,98,99,46,46,97,778,97,776,111,776]
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfd_binary" arity="1"/>
+      <fsummary>Normalize characters to a utf8 binary of canonical equivalent
+      decomposed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of canonical equivalent Decomposed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.</p>
+	<p>The result is an utf8 encoded binary.</p>
+        <code>
+2> unicode:characters_to_nfd_binary("abc..åäö").
+&lt;&lt;97,98,99,46,46,97,204,138,97,204,136,111,204,136>>
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfkc_list" arity="1"/>
+      <fsummary>Normalize characters to a list of canonical equivalent
+      composed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of compatibly equivalent Composed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.
+	</p>
+	<p>The result is a list of characters.</p>
+        <code>
+3> unicode:characters_to_nfkc_list([&lt;&lt;"abc..a">>,[778],$a,[776],$o,[776],[65299,65298]]).
+"abc..åäö32"
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfkc_binary" arity="1"/>
+      <fsummary>Normalize characters to a utf8 binary of compatibly equivalent
+      composed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of compatibly equivalent Composed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.</p>
+	<p>The result is an utf8 encoded binary.</p>
+        <code>
+4> unicode:characters_to_nfkc_binary([&lt;&lt;"abc..a">>,[778],$a,[776],$o,[776],[65299,65298]]).
+&lt;&lt;"abc..åäö32"/utf8>>
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfkd_list" arity="1"/>
+      <fsummary>Normalize characters to a list of compatibly equivalent
+      decomposed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of compatibly equivalent Decomposed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.
+	</p>
+	<p>The result is a list of characters.</p>
+        <code>
+1> unicode:characters_to_nfkd_list(["abc..åäö",[65299,65298]]).
+[97,98,99,46,46,97,778,97,776,111,776,51,50]
+</code>
+      </desc>
+    </func>
+
+    <func>
+      <name name="characters_to_nfkd_binary" arity="1"/>
+      <fsummary>Normalize characters to a utf8 binary of compatibly equivalent
+      decomposed Unicode characters.</fsummary>
+      <desc>
+        <p>Converts a possibly deep list of characters and binaries
+        into a Normalized Form of compatibly equivalent Decomposed
+        characters according to the Unicode standard.</p>
+	<p>Any binaries in the input must be encoded with utf8
+        encoding.</p>
+	<p>The result is an utf8 encoded binary.</p>
+        <code>
+2> unicode:characters_to_nfkd_binary(["abc..åäö",[65299,65298]]).
+&lt;&lt;97,98,99,46,46,97,204,138,97,204,136,111,204,136,51,50>>
+</code>
+      </desc>
+    </func>
+
+    <func>
       <name name="encoding_to_bom" arity="1"/>
       <fsummary>Create a binary UTF byte order mark from encoding.</fsummary>
       <type_desc variable="Bin">
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 1e7f08db86..6af2fa9fa3 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -63,9 +63,12 @@
 	<item><p>In Erlang/OTP 17.0, the encoding default for Erlang
 	source files was switched to UTF-8.</p></item>
 
-	<item><p>In Erlang/OTP 20.0, atoms and function names can contain
+	<item><p>In Erlang/OTP 20.0, atoms and function can contain
 	Unicode characters. Module names, application names, and node
-	names are still restricted to the ISO Latin-1 range.</p></item>
+	names are still restricted to the ISO Latin-1 range.</p>
+	<p>Support was added for normalizations forms in
+	<c>unicode</c> and the <c>string</c> module now handles
+	utf8-encoded binaries.</p></item>
       </list>
 
     <p>This section outlines the current Unicode support and gives some
@@ -110,23 +113,27 @@
       </item>
     </list>
 
-    <p>So, a conversion function must know not only one character at a time,
-      but possibly the whole sentence, the natural language to translate to,
-      the differences in input and output string length, and so on.
-      Erlang/OTP has currently no Unicode <c>to_upper</c>/<c>to_lower</c>
-      functionality, but publicly available libraries address these issues.</p>
-
-    <p>Another example is the accented characters, where the same glyph has two
-      different representations. The Swedish letter "ö" is one example.
-      The Unicode standard has a code point for it, but you can also write it
-      as "o" followed by "U+0308" (Combining Diaeresis, with the simplified
-      meaning that the last letter is to have "¨" above). They have the same
-      glyph. They are for most purposes the same, but have different
-      representations. For example, MacOS X converts all filenames to use
-      Combining Diaeresis, while most other programs (including Erlang) try to
-      hide that by doing the opposite when, for example, listing directories.
-      However it is done, it is usually important to normalize such
-      characters to avoid confusion.</p>
+    <p>So, a conversion function must know not only one character at a
+    time, but possibly the whole sentence, the natural language to
+    translate to, the differences in input and output string length,
+    and so on.  Erlang/OTP has currently no Unicode
+    <c>uppercase</c>/<c>lowercase</c> functionality with language
+    specific handling, but publicly available libraries address these
+    issues.</p>
+
+    <p>Another example is the accented characters, where the same
+    glyph has two different representations. The Swedish letter "ö" is
+    one example.  The Unicode standard has a code point for it, but
+    you can also write it as "o" followed by "U+0308" (Combining
+    Diaeresis, with the simplified meaning that the last letter is to
+    have "¨" above). They have the same glyph, user perceived
+    character. They are for most purposes the same, but have different
+    representations. For example, MacOS X converts all filenames to
+    use Combining Diaeresis, while most other programs (including
+    Erlang) try to hide that by doing the opposite when, for example,
+    listing directories.  However it is done, it is usually important
+    to normalize such characters to avoid confusion.
+    </p>
 
     <p>The list of examples can be made long. One need a kind of knowledge that
       was not needed when programs only considered one or two languages. The
@@ -273,7 +280,7 @@
         them. In some cases functionality has been added to already
         existing interfaces (as the <seealso
         marker="stdlib:string"><c>string</c></seealso> module now can
-        handle lists with any code points). In some cases new
+        handle strings with any code points). In some cases new
         functionality or options have been added (as in the <seealso
         marker="stdlib:io"><c>io</c></seealso> module, the file
         handling, the <seealso
@@ -978,7 +985,7 @@ Eshell V5.10.1  (abort with ^G)
 
     <p>Fortunately, most textual data has been stored in lists and range
       checking has been sparse, so modules like <c>string</c> work well for
-      Unicode lists with little need for conversion or extension.</p>
+      Unicode strings with little need for conversion or extension.</p>
 
     <p>Some modules are, however, changed to be explicitly Unicode-aware. These
       modules include:</p>
@@ -1029,18 +1036,17 @@ Eshell V5.10.1  (abort with ^G)
           has extensive support for Unicode text.</p></item>
     </taglist>
 
-    <p>The <seealso marker="stdlib:string"><c>string</c></seealso> module works
-      perfectly for Unicode strings and ISO Latin-1 strings, except the
-      language-dependent functions
-      <seealso marker="stdlib:string#to_upper/1"><c>string:to_upper/1</c></seealso>
-      and
-      <seealso marker="stdlib:string#to_lower/1"><c>string:to_lower/1</c></seealso>,
-      which are only correct for the ISO Latin-1 character set. These two
-      functions can never function correctly for Unicode characters in their
-      current form, as there are language and locale issues as well as
-      multi-character mappings to consider when converting text between cases.
-      Converting case in an international environment is a large subject not
-      yet addressed in OTP.</p>
+    <p>The <seealso marker="stdlib:string"><c>string</c></seealso>
+    module works perfectly for Unicode strings and ISO Latin-1
+    strings, except the language-dependent functions <seealso
+    marker="stdlib:string#uppercase/1"><c>string:uppercase/1</c></seealso>
+    and <seealso
+    marker="stdlib:string#lowercase/1"><c>string:lowercase/1</c></seealso>.
+    These two functions can never function correctly for Unicode
+    characters in their current form, as there are language and locale
+    issues to consider when converting text between cases.  Converting
+    case in an international environment is a large subject not yet
+    addressed in OTP.</p>
   </section>
 
   <section>