aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/doc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/doc')
-rw-r--r--lib/stdlib/doc/src/Makefile1
-rw-r--r--lib/stdlib/doc/src/c.xml9
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml47
-rw-r--r--lib/stdlib/doc/src/ets.xml33
-rw-r--r--lib/stdlib/doc/src/filelib.xml29
-rw-r--r--lib/stdlib/doc/src/filename.xml30
-rw-r--r--lib/stdlib/doc/src/gb_sets.xml2
-rw-r--r--lib/stdlib/doc/src/gen_server.xml52
-rw-r--r--lib/stdlib/doc/src/gen_statem.xml14
-rw-r--r--lib/stdlib/doc/src/io.xml109
-rw-r--r--lib/stdlib/doc/src/lists.xml14
-rw-r--r--lib/stdlib/doc/src/maps.xml94
-rw-r--r--lib/stdlib/doc/src/ordsets.xml9
-rw-r--r--lib/stdlib/doc/src/rand.xml118
-rw-r--r--lib/stdlib/doc/src/ref_man.xml1
-rw-r--r--lib/stdlib/doc/src/sets.xml9
-rw-r--r--lib/stdlib/doc/src/specs.xml1
-rw-r--r--lib/stdlib/doc/src/string.xml6
-rw-r--r--lib/stdlib/doc/src/unicode_usage.xml8
-rw-r--r--lib/stdlib/doc/src/uri_string.xml359
20 files changed, 878 insertions, 67 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile
index e7ea38c5c3..af27efa6c1 100644
--- a/lib/stdlib/doc/src/Makefile
+++ b/lib/stdlib/doc/src/Makefile
@@ -98,6 +98,7 @@ XML_REF3_FILES = \
sys.xml \
timer.xml \
unicode.xml \
+ uri_string.xml \
win32reg.xml \
zip.xml
diff --git a/lib/stdlib/doc/src/c.xml b/lib/stdlib/doc/src/c.xml
index 7666699183..697e1715e7 100644
--- a/lib/stdlib/doc/src/c.xml
+++ b/lib/stdlib/doc/src/c.xml
@@ -94,6 +94,15 @@
</func>
<func>
+ <name name="erlangrc" arity="1"/>
+ <fsummary>Load an erlang resource file.</fsummary>
+ <desc>
+ <p>Search <c>PathList</c> and load <c>.erlang</c> resource file if
+ found.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="flush" arity="0"/>
<fsummary>Flush any messages sent to the shell.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index 337028568a..68fa071090 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2003</year><year>2017</year>
+ <year>2003</year><year>2018</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -90,8 +90,8 @@
<section>
<title>Other Storage Media</title>
- <p>The <seealso marker="inets:ftp"><c>ftp</c></seealso>
- module (Inets) normally accesses the tar file on disk using
+ <p>The <seealso marker="ftp:ftp"><c>ftp</c></seealso>
+ module normally accesses the tar file on disk using
the <seealso marker="kernel:file"><c>file</c></seealso> module.
When other needs arise, you can define your own low-level Erlang
functions to perform the writing and reading on the storage media;
@@ -136,6 +136,9 @@
<v>Filename = filename()|{NameInArchive,FilenameOrBin}</v>
<v>Options = [Option]</v>
<v>Option = dereference|verbose|{chunks,ChunkSize}</v>
+ <v>|{atime,non_neg_integer()}|{mtime,non_neg_integer()}</v>
+ <v>|{ctime,non_neg_integer()}|{uid,non_neg_integer()}</v>
+ <v>|{gid,non_neg_integer()}</v>
<v>ChunkSize = positive_integer()</v>
<v>RetValue = ok|{error,{Filename,Reason}}</v>
<v>Reason = term()</v>
@@ -167,6 +170,42 @@
<seealso marker="ssh:ssh_sftp#open_tar/3">
<c>ssh_sftp:open_tar/3</c></seealso>.</p>
</item>
+ <tag><c>{atime,non_neg_integer()}</c></tag>
+ <item>
+ <p>Sets the last time, as
+ <seealso marker="erts:time_correction#POSIX_Time">
+ POSIX time</seealso>, when the file was read. See also
+ <seealso marker="kernel:file#read_file_info/1">
+ <c>file:read_file_info/1</c></seealso>.</p>
+ </item>
+ <tag><c>{mtime,non_neg_integer()}</c></tag>
+ <item>
+ <p>Sets the last time, as
+ <seealso marker="erts:time_correction#POSIX_Time">
+ POSIX time</seealso>, when the file was written. See also
+ <seealso marker="kernel:file#read_file_info/1">
+ <c>file:read_file_info/1</c></seealso>.</p>
+ </item>
+ <tag><c>{ctime,non_neg_integer()}</c></tag>
+ <item>
+ <p>Sets the time, as
+ <seealso marker="erts:time_correction#POSIX_Time">
+ POSIX time</seealso>, when the file was created. See also
+ <seealso marker="kernel:file#read_file_info/1">
+ <c>file:read_file_info/1</c></seealso>.</p>
+ </item>
+ <tag><c>{uid,non_neg_integer()}</c></tag>
+ <item>
+ <p>Sets the file owner.
+ <seealso marker="kernel:file#read_file_info/1">
+ <c>file:read_file_info/1</c></seealso>.</p>
+ </item>
+ <tag><c>{gid,non_neg_integer()}</c></tag>
+ <item>
+ <p>Sets the group that the file owner belongs to.
+ <seealso marker="kernel:file#read_file_info/1">
+ <c>file:read_file_info/1</c></seealso>.</p>
+ </item>
</taglist>
</desc>
</func>
@@ -378,7 +417,7 @@
<v>Reason = term()</v>
</type>
<desc>
- <p>Cconverts an error reason term to a human-readable error message
+ <p>Converts an error reason term to a human-readable error message
string.</p>
</desc>
</func>
diff --git a/lib/stdlib/doc/src/ets.xml b/lib/stdlib/doc/src/ets.xml
index 51e35cd2df..305376a425 100644
--- a/lib/stdlib/doc/src/ets.xml
+++ b/lib/stdlib/doc/src/ets.xml
@@ -487,6 +487,11 @@ Error: fun containing local Erlang function calls
<p>The pid of the heir of the table, or <c>none</c> if no heir
is set.</p>
</item>
+ <tag><c>{id,</c><seealso marker="#type-tid">
+ <c>tid()</c></seealso><c>}</c></tag>
+ <item>
+ <p>The table identifier.</p>
+ </item>
<tag><c>{keypos, integer() >= 1}</c></tag>
<item>
<p>The key position.</p>
@@ -1074,10 +1079,13 @@ ets:select(Table, MatchSpec),</code>
</item>
<tag><c>named_table</c></tag>
<item>
- <p>If this option is present, name <c><anno>Name</anno></c> is
- associated with the table identifier. The name can then
- be used instead of the table identifier in subsequent
- operations.</p>
+ <p>If this option is present, the table is registered under its
+ <c><anno>Name</anno></c> which can then be used instead of the
+ table identifier in subsequent operations.</p>
+ <p>The function will also return the <c><anno>Name</anno></c>
+ instead of the table identifier. To get the table identifier of a
+ named table, use
+ <seealso marker="#whereis/1"><c>whereis/1</c></seealso>.</p>
</item>
<tag><c>{keypos,<anno>Pos</anno>}</c></tag>
<item>
@@ -1961,7 +1969,7 @@ true</pre>
The return value is a list of the new counter values from each
update operation in the same order as in the operation list. If an
empty list is specified, nothing is updated and an empty list is
- returned. If the function fails, no updates is done.</p>
+ returned. If the function fails, no updates are done.</p>
<p>The specified <c><anno>Key</anno></c> is used to identify the object
by either <em>matching</em> the key of an object in a <c>set</c>
table, or <em>compare equal</em> to the key of an object in an
@@ -2037,6 +2045,21 @@ true</pre>
</list>
</desc>
</func>
+
+ <func>
+ <name name="whereis" arity="1"/>
+ <fsummary>Retrieves the tid() of a named table.</fsummary>
+ <desc>
+ <p>This function returns the
+ <seealso marker="#type-tid"><c>tid()</c></seealso> of the named table
+ identified by <c><anno>TableName</anno></c>, or <c>undefined</c> if
+ no such table exists. The <c>tid()</c> can be used in place of the
+ table name in all operations, which is slightly faster since the name
+ does not have to be resolved on each call.</p>
+ <p>If the table is deleted, the <c>tid()</c> will be invalid even if
+ another named table is created with the same name.</p>
+ </desc>
+ </func>
</funcs>
</erlref>
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 11762a3c5a..3b5be75bc0 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -45,6 +45,30 @@
<p>For more information about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
@@ -193,6 +217,11 @@
<p>Other characters represent themselves. Only filenames that
have exactly the same character in the same position match.
Matching is case-sensitive, for example, "a" does not match "A".</p>
+ <p>Directory separators must always be written as <c>/</c>, even on
+ Windows.</p>
+ <p>A character preceded by <c>\</c> loses its special meaning. Note
+ that <c>\</c> must be written as <c>\\</c> in a string literal.
+ For example, "\\?*" will match any filename starting with <c>?</c>.</p>
<p>Notice that multiple "*" characters are allowed
(as in Unix wildcards, but opposed to Windows/DOS wildcards).</p>
<p><em>Examples:</em></p>
diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml
index 1135a6dd80..ce19f70df0 100644
--- a/lib/stdlib/doc/src/filename.xml
+++ b/lib/stdlib/doc/src/filename.xml
@@ -46,7 +46,10 @@
filename by removing redundant directory separators, use
<seealso marker="#join/1"><c>join/1</c></seealso>.</p>
- <p>The module supports raw filenames in the way that if a binary is
+ <p>
+ The module supports
+ <seealso marker="unicode_usage#notes-about-raw-filenames">raw
+ filenames</seealso> in the way that if a binary is
present, or the filename cannot be interpreted according to the return
value of <seealso marker="kernel:file#native_name_encoding/0">
<c>file:native_name_encoding/0</c></seealso>, a raw filename is also
@@ -56,6 +59,30 @@
(the join operation is performed of course). For more information
about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
<datatype>
@@ -558,6 +585,7 @@ unsafe</pre>
["a:/","msdev","include"]</pre>
</desc>
</func>
+
</funcs>
</erlref>
diff --git a/lib/stdlib/doc/src/gb_sets.xml b/lib/stdlib/doc/src/gb_sets.xml
index 7bfe477a11..2a3785dc27 100644
--- a/lib/stdlib/doc/src/gb_sets.xml
+++ b/lib/stdlib/doc/src/gb_sets.xml
@@ -83,6 +83,8 @@
</item>
<item><seealso marker="#is_element/2"><c>is_element/2</c></seealso>
</item>
+ <item><seealso marker="#is_empty/1"><c>is_empty/1</c></seealso>
+ </item>
<item><seealso marker="#is_set/1"><c>is_set/1</c></seealso>
</item>
<item><seealso marker="#is_subset/2"><c>is_subset/2</c></seealso>
diff --git a/lib/stdlib/doc/src/gen_server.xml b/lib/stdlib/doc/src/gen_server.xml
index 7d137fc772..da74e793e6 100644
--- a/lib/stdlib/doc/src/gen_server.xml
+++ b/lib/stdlib/doc/src/gen_server.xml
@@ -60,6 +60,8 @@ gen_server:abcast -----> Module:handle_cast/2
- -----> Module:handle_info/2
+- -----> Module:handle_continue/2
+
- -----> Module:terminate/2
- -----> Module:code_change/3</pre>
@@ -88,6 +90,13 @@ gen_server:abcast -----> Module:handle_cast/2
implies at least two garbage collections (when hibernating and
shortly after waking up) and is not something you want to do
between each call to a busy server.</p>
+
+ <p>If the <c>gen_server</c> process needs to perform an action
+ immediately after initialization or to break the execution of a
+ callback into multiple steps, it can return <c>{continue,Continue}</c>
+ in place of the time-out or hibernation value, which will immediately
+ invoke the <c>handle_continue/2</c> callback.</p>
+
</description>
<funcs>
@@ -610,12 +619,15 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {reply,Reply,NewState} | {reply,Reply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {reply,Reply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {reply,Reply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,Reply,NewState} | {stop,Reason,NewState}</v>
<v>&nbsp;Reply = term()</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
<v>&nbsp;Reason = term()</v>
</type>
<desc>
@@ -673,9 +685,11 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
<v>&nbsp;Reason = term()</v>
</type>
<desc>
@@ -690,6 +704,41 @@ gen_server:abcast -----> Module:handle_cast/2
</func>
<func>
+ <name>Module:handle_continue(Continue, State) -> Result</name>
+ <fsummary>Handle a continue instruction.</fsummary>
+ <type>
+ <v>Continue = term()</v>
+ <v>State = term()</v>
+ <v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
+ <v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
+ <v>&nbsp;NewState = term()</v>
+ <v>&nbsp;Timeout = int()>=0 | infinity</v>
+ <v>&nbsp;Continue = term()</v>
+ <v>&nbsp;Reason = normal | term()</v>
+ </type>
+ <desc>
+ <note>
+ <p>This callback is optional, so callback modules need to
+ export it only if they return <c>{continue,Continue}</c>
+ from another callback. If continue is used and the callback
+ is not implemented, the process will exit with <c>undef</c>
+ error.</p>
+ </note>
+ <p>This function is called by a <c>gen_server</c> process whenever
+ a previous callback returns <c>{continue, Continue}</c>.
+ <c>handle_continue/2</c> is invoked immediately after the previous
+ callback, which makes it useful for performing work after
+ initialization or for splitting the work in a callback in
+ multiple steps, updating the process state along the way.</p>
+ <p>For a description of the other arguments and possible return values,
+ see <seealso marker="#Module:handle_call/3">
+ <c>Module:handle_call/3</c></seealso>.</p>
+ </desc>
+ </func>
+
+ <func>
<name>Module:handle_info(Info, State) -> Result</name>
<fsummary>Handle an incoming message.</fsummary>
<type>
@@ -697,6 +746,7 @@ gen_server:abcast -----> Module:handle_cast/2
<v>State = term()</v>
<v>Result = {noreply,NewState} | {noreply,NewState,Timeout}</v>
<v>&nbsp;&nbsp;| {noreply,NewState,hibernate}</v>
+ <v>&nbsp;&nbsp;| {noreply,NewState,{continue,Continue}}</v>
<v>&nbsp;&nbsp;| {stop,Reason,NewState}</v>
<v>&nbsp;NewState = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
@@ -726,7 +776,7 @@ gen_server:abcast -----> Module:handle_cast/2
<type>
<v>Args = term()</v>
<v>Result = {ok,State} | {ok,State,Timeout} | {ok,State,hibernate}</v>
- <v>&nbsp;| {stop,Reason} | ignore</v>
+ <v>&nbsp;| {ok,State,{continue,Continue}} | {stop,Reason} | ignore</v>
<v>&nbsp;State = term()</v>
<v>&nbsp;Timeout = int()>=0 | infinity</v>
<v>&nbsp;Reason = term()</v>
diff --git a/lib/stdlib/doc/src/gen_statem.xml b/lib/stdlib/doc/src/gen_statem.xml
index 574f488e91..be0d64feba 100644
--- a/lib/stdlib/doc/src/gen_statem.xml
+++ b/lib/stdlib/doc/src/gen_statem.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2016</year><year>2017</year>
+ <year>2016</year><year>2018</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -1369,7 +1369,7 @@ handle_event(_, _, State, Data) ->
<c><anno>T</anno></c> is the time-out time.
<c>{clean_timeout,<anno>T</anno>}</c> works like
just <c>T</c> described in the note above
- and uses a proxy process for <c>T &lt; infinity</c>,
+ and uses a proxy process
while <c>{dirty_timeout,<anno>T</anno>}</c>
bypasses the proxy process which is more lightweight.
</p>
@@ -1379,8 +1379,12 @@ handle_event(_, _, State, Data) ->
with <c>{dirty_timeout,<anno>T</anno>}</c>
to avoid that the calling process dies when the call
times out, you will have to be prepared to handle
- a late reply.
- So why not just let the calling process die?
+ a late reply. Note that there is an odd chance
+ to get a late reply even with
+ <c>{dirty_timeout,infinity}</c> or <c>infinity</c>
+ for example in the event of network problems.
+ So why not just let the calling process die
+ by not catching the exception?
</p>
</note>
<p>
@@ -1891,7 +1895,7 @@ handle_event(_, _, State, Data) ->
</p>
<note>
<p>
- Note that if the <c>gen_statem</c> is started trough
+ Note that if the <c>gen_statem</c> is started through
<seealso marker="proc_lib"><c>proc_lib</c></seealso>
and
<seealso marker="#enter_loop/4"><c>enter_loop/4-6</c></seealso>,
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml
index 64fcf4379f..f1037ec76b 100644
--- a/lib/stdlib/doc/src/io.xml
+++ b/lib/stdlib/doc/src/io.xml
@@ -137,11 +137,11 @@
Hello world!
ok</pre>
<p>The general format of a control sequence is <c>~F.P.PadModC</c>.</p>
- <p>Character <c>C</c> determines the type of control sequence
- to be used, <c>F</c> and <c>P</c> are optional numeric
- arguments. If <c>F</c>, <c>P</c>, or <c>Pad</c> is <c>*</c>,
- the next argument in <c>Data</c> is used as the numeric value
- of <c>F</c> or <c>P</c>.</p>
+ <p>The character <c>C</c> determines the type of control sequence
+ to be used. It is the only required field. All of <c>F</c>,
+ <c>P</c>, <c>Pad</c>, and <c>Mod</c> are optional. For example,
+ to use a <c>#</c> for <c>Pad</c> but use the default values for
+ <c>F</c> and <c>P</c>, you can write <c>~..#C</c>.</p>
<list type="bulleted">
<item>
<p><c>F</c> is the <c>field width</c> of the printed argument. A
@@ -167,13 +167,26 @@ ok</pre>
The default padding character is <c>' '</c> (space).</p>
</item>
<item>
- <p><c>Mod</c> is the control sequence modifier. It is either a
- single character (<c>t</c>, for Unicode
- translation, and <c>l</c>, for stopping <c>p</c> and
- <c>P</c> from detecting printable characters)
- that changes the interpretation of <c>Data</c>.</p>
+ <p><c>Mod</c> is the control sequence modifier. This is
+ one or more characters that change the interpretation of
+ <c>Data</c>. The current modifiers are <c>t</c>, for Unicode
+ translation, and <c>l</c>, for stopping <c>p</c> and <c>P</c>
+ from detecting printable characters.</p>
</item>
</list>
+ <p>If <c>F</c>, <c>P</c>, or <c>Pad</c> is a <c>*</c> character,
+ the next argument in <c>Data</c> is used as the value.
+ For example:</p>
+ <pre>
+1> <input>io:fwrite("~*.*.0f~n",[9, 5, 3.14159265]).</input>
+003.14159
+ok</pre>
+ <p>To use a literal <c>*</c> character as <c>Pad</c>, it must be
+ passed as an argument:</p>
+ <pre>
+2> <input>io:fwrite("~*.*.*f~n",[9, 5, $*, 3.14159265]).</input>
+**3.14159
+ok</pre>
<p><em>Available control sequences:</em></p>
<taglist>
<tag><c>~</c></tag>
@@ -257,8 +270,9 @@ ok</pre>
\x{400}
ok
5> <input>io:fwrite("~s~n",[[1024]]).</input>
-** exception exit: {badarg,[{io,format,[&lt;0.26.0&gt;,"~s~n",[[1024]]]},
- ...</pre>
+** exception error: bad argument
+ in function io:format/3
+ called as io:format(&lt;0.53.0>,"~s~n",[[1024]])</pre>
</item>
<tag><c>w</c></tag>
<item>
@@ -276,10 +290,9 @@ ok
<c>~w</c>, but breaks terms whose printed representation
is longer than one line into many lines and indents each
line sensibly. Left-justification is not supported.
- It also tries to detect lists of
- printable characters and to output these as strings. The
- Unicode translation modifier is used for determining
- what characters are printable, for example:</p>
+ It also tries to detect flat lists of
+ printable characters and output these as strings.
+ For example:</p>
<pre>
1> <input>T = [{attributes,[[{id,age,1.50000},{mode,explicit},</input>
<input>{typename,"INTEGER"}], [{id,cho},{mode,explicit},{typename,'Cho'}]]},</input>
@@ -301,7 +314,7 @@ ok
{mode,implicit}]
ok</pre>
<p>The field width specifies the maximum line length.
- Defaults to 80. The precision specifies the initial
+ It defaults to 80. The precision specifies the initial
indentation of the term. It defaults to the number of
characters printed on this line in the <em>same</em> call to
<seealso marker="#write/1"><c>write/1</c></seealso> or
@@ -331,18 +344,53 @@ ok
[{a,[97]},
{b,[98]}]
ok</pre>
- <p>Binaries that look like UTF-8 encoded strings are
- output with the string syntax if the Unicode translation
- modifier is specified:</p>
+ <p>The Unicode translation modifier <c>t</c> specifies how to treat
+ characters outside the Latin-1 range of codepoints, in
+ atoms, strings, and binaries. For example, printing an atom
+ containing a character &gt; 255:</p>
+ <pre>
+8> <input>io:fwrite("~p~n",[list_to_atom([1024])]).</input>
+'\x{400}'
+ok
+9> <input>io:fwrite("~tp~n",[list_to_atom([1024])]).</input>
+'Ѐ'
+ok</pre>
+ <p>By default, Erlang only detects lists of characters
+ in the Latin-1 range as strings, but the <c>+pc unicode</c>
+ flag can be used to change this (see <seealso
+ marker="#printable_range/0">
+ <c>printable_range/0</c></seealso> for details). For example:</p>
+ <pre>
+10> <input>io:fwrite("~p~n",[[214]]).</input>
+"Ö"
+ok
+11> <input>io:fwrite("~p~n",[[1024]]).</input>
+[1024]
+ok
+12> <input>io:fwrite("~tp~n",[[1024]]).</input>
+[1024]
+ok
+</pre>
+ <p>but if Erlang was started with <c>+pc unicode</c>:</p>
<pre>
-9> <input>io:fwrite("~p~n",[[1024]]).</input>
+13> <input>io:fwrite("~p~n",[[1024]]).</input>
[1024]
-10> <input>io:fwrite("~tp~n",[[1024]]).</input>
-"\x{400}"
-11> <input>io:fwrite("~tp~n", [&lt;&lt;128,128&gt;&gt;]).</input>
+ok
+14> <input>io:fwrite("~tp~n",[[1024]]).</input>
+"Ѐ"
+ok</pre>
+ <p>Similarly, binaries that look like UTF-8 encoded strings
+ are output with the binary string syntax if the <c>t</c>
+ modifier is specified:</p>
+ <pre>
+15> <input>io:fwrite("~p~n", [&lt;&lt;208,128&gt;&gt;]).</input>
+&lt;&lt;208,128&gt;&gt;
+ok
+16> <input>io:fwrite("~tp~n", [&lt;&lt;208,128&gt;&gt;]).</input>
+&lt;&lt;"Ѐ"/utf8&gt;&gt;
+ok
+17> <input>io:fwrite("~tp~n", [&lt;&lt;128,128&gt;&gt;]).</input>
&lt;&lt;128,128&gt;&gt;
-12> <input>io:fwrite("~tp~n", [&lt;&lt;208,128&gt;&gt;]).</input>
-&lt;&lt;"\x{400}"/utf8&gt;&gt;
ok</pre>
</item>
<tag><c>W</c></tag>
@@ -454,12 +502,9 @@ ok</pre>
abc def 'abc def' {foo,1} A
ok
2> <input>io:fwrite("~s", [65]).</input>
-** exception exit: {badarg,[{io,format,[&lt;0.22.0>,"~s","A"]},
- {erl_eval,do_apply,5},
- {shell,exprs,6},
- {shell,eval_exprs,6},
- {shell,eval_loop,3}]}
- in function io:o_request/2</pre>
+** exception error: bad argument
+ in function io:format/3
+ called as io:format(&lt;0.53.0>,"~s","A")</pre>
<p>In this example, an attempt was made to output the single
character 65 with the aid of the string formatting directive
<c>"~s"</c>.</p>
diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml
index 7efafedc82..c3d5d7e07a 100644
--- a/lib/stdlib/doc/src/lists.xml
+++ b/lib/stdlib/doc/src/lists.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>1996</year><year>2017</year>
+ <year>1996</year><year>2018</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -771,6 +771,18 @@ length(lists:seq(From, To, Incr)) =:= (To - From + Incr) div Incr</code>
</func>
<func>
+ <name name="search" arity="2"/>
+ <fsummary>Find the first element that satisfies a predicate.</fsummary>
+ <desc>
+ <p>If there is a <c><anno>Value</anno></c> in <c><anno>List</anno></c>
+ such that <c><anno>Pred</anno>(<anno>Value</anno>)</c> returns
+ <c>true</c>, returns <c>{value, <anno>Value</anno>}</c>
+ for the first such <c><anno>Value</anno></c>,
+ otherwise returns <c>false</c>.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="splitwith" arity="2"/>
<fsummary>Split a list into two lists based on a predicate.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/maps.xml b/lib/stdlib/doc/src/maps.xml
index 8c7270816b..987d92989d 100644
--- a/lib/stdlib/doc/src/maps.xml
+++ b/lib/stdlib/doc/src/maps.xml
@@ -33,16 +33,31 @@
<p>This module contains functions for maps processing.</p>
</description>
+ <datatypes>
+ <datatype>
+ <name name="iterator"/>
+ <desc>
+ <p>An iterator representing the key value associations in a map.</p>
+ <p>Created using <seealso marker="#iterator-1"><c>maps:iterator/1</c></seealso>.</p>
+ <p>Consumed by <seealso marker="#next-1"><c>maps:next/1</c></seealso>,
+ <seealso marker="#filter-2"><c>maps:filter/2</c></seealso>,
+ <seealso marker="#fold-3"><c>maps:fold/3</c></seealso> and
+ <seealso marker="#map-2"><c>maps:map/2</c></seealso>.</p>
+ </desc>
+ </datatype>
+ </datatypes>
+
<funcs>
<func>
<name name="filter" arity="2"/>
<fsummary>Select pairs that satisfy a predicate.</fsummary>
<desc>
- <p>Returns a map <c><anno>Map2</anno></c> for which predicate
- <c><anno>Pred</anno></c> holds true in <c><anno>Map1</anno></c>.</p>
+ <p>Returns a map <c><anno>Map</anno></c> for which predicate
+ <c><anno>Pred</anno></c> holds true in <c><anno>MapOrIter</anno></c>.</p>
<p>The call fails with a <c>{badmap,Map}</c> exception if
- <c><anno>Map1</anno></c> is not a map, or with <c>badarg</c> if
- <c><anno>Pred</anno></c> is not a function of arity 2.</p>
+ <c><anno>MapOrIter</anno></c> is not a map or valid iterator,
+ or with <c>badarg</c> if <c><anno>Pred</anno></c> is not a
+ function of arity 2.</p>
<p><em>Example:</em></p>
<code type="none">
> M = #{a => 2, b => 3, c=> 4, "a" => 1, "b" => 2, "c" => 4},
@@ -76,12 +91,16 @@
<fsummary></fsummary>
<desc>
<p>Calls <c>F(K, V, AccIn)</c> for every <c><anno>K</anno></c> to value
- <c><anno>V</anno></c> association in <c><anno>Map</anno></c> in
+ <c><anno>V</anno></c> association in <c><anno>MapOrIter</anno></c> in
any order. Function <c>fun F/3</c> must return a new
accumulator, which is passed to the next successive call.
This function returns the final value of the accumulator. The initial
accumulator value <c><anno>Init</anno></c> is returned if the map is
empty.</p>
+ <p>The call fails with a <c>{badmap,Map}</c> exception if
+ <c><anno>MapOrIter</anno></c> is not a map or valid iterator,
+ or with <c>badarg</c> if <c><anno>Fun</anno></c> is not a
+ function of arity 3.</p>
<p><em>Example:</em></p>
<code type="none">
> Fun = fun(K,V,AccIn) when is_list(K) -> AccIn + V end,
@@ -169,6 +188,32 @@ false</code>
</func>
<func>
+ <name name="iterator" arity="1"/>
+ <fsummary>Create a map iterator.</fsummary>
+ <desc>
+ <p>Returns a map iterator <c><anno>Iterator</anno></c> that can
+ be used by <seealso marker="#next-1"><c>maps:next/1</c></seealso>
+ to traverse the key-value associations in a map. When iterating
+ over a map, the memory usage is guaranteed to be bounded no matter
+ the size of the map.</p>
+ <p>The call fails with a <c>{badmap,Map}</c> exception if
+ <c><anno>Map</anno></c> is not a map.</p>
+ <p><em>Example:</em></p>
+ <code type="none">
+> M = #{ a => 1, b => 2 }.
+#{a => 1,b => 2}
+> I = maps:iterator(M).
+[{a,1},{b,2}]
+> {K1, V1, I2} = maps:next(I).
+{a,1,[{b,2}]}
+> {K2, V2, I3} = maps:next(I2).
+{b,2,[]}
+> maps:next(I3).
+none</code>
+ </desc>
+ </func>
+
+ <func>
<name name="keys" arity="1"/>
<fsummary></fsummary>
<desc>
@@ -188,12 +233,16 @@ false</code>
<name name="map" arity="2"/>
<fsummary></fsummary>
<desc>
- <p>Produces a new map <c><anno>Map2</anno></c> by calling function
+ <p>Produces a new map <c><anno>Map</anno></c> by calling function
<c>fun F(K, V1)</c> for every <c><anno>K</anno></c> to value
- <c><anno>V1</anno></c> association in <c><anno>Map1</anno></c> in
+ <c><anno>V1</anno></c> association in <c><anno>MapOrIter</anno></c> in
any order. Function <c>fun F/2</c> must return value
<c><anno>V2</anno></c> to be associated with key <c><anno>K</anno></c>
- for the new map <c><anno>Map2</anno></c>.</p>
+ for the new map <c><anno>Map</anno></c>.</p>
+ <p>The call fails with a <c>{badmap,Map}</c> exception if
+ <c><anno>MapOrIter</anno></c> is not a map or valid iterator,
+ or with <c>badarg</c> if <c><anno>Fun</anno></c> is not a
+ function of arity 2.</p>
<p><em>Example:</em></p>
<code type="none">
> Fun = fun(K,V1) when is_list(K) -> V1*2 end,
@@ -234,6 +283,35 @@ false</code>
</func>
<func>
+ <name name="next" arity="1"/>
+ <fsummary>Get the next key and value from an iterator.</fsummary>
+ <desc>
+ <p>Returns the next key-value association in
+ <c><anno>Iterator</anno></c> and a new iterator for the
+ remaining associations in the iterator.
+ </p>
+ <p>
+ If there are no more associations in the iterator,
+ <c>none</c> is returned.
+ </p>
+ <p><em>Example:</em></p>
+ <code type="none">
+> Map = #{a => 1, b => 2, c => 3}.
+#{a => 1,b => 2,c => 3}
+> Iter = maps:iterator(Map).
+[{a,1},{b,2},{c,3}]
+> {_, _, Iter1} = maps:next(Iter).
+{a,1,[{b,2},{c,3}]}
+> {_, _, Iter2} = maps:next(Iter1).
+{b,2,[{c,3}]}
+> {_, _, Iter3} = maps:next(Iter2).
+{c,3,[]}
+> maps:next(Iter3).
+none</code>
+ </desc>
+ </func>
+
+ <func>
<name name="put" arity="3"/>
<fsummary></fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/ordsets.xml b/lib/stdlib/doc/src/ordsets.xml
index 7b590932e4..2d891d7a5a 100644
--- a/lib/stdlib/doc/src/ordsets.xml
+++ b/lib/stdlib/doc/src/ordsets.xml
@@ -142,6 +142,15 @@
</func>
<func>
+ <name name="is_empty" arity="1"/>
+ <fsummary>Test for empty set.</fsummary>
+ <desc>
+ <p>Returns <c>true</c> if <c><anno>Ordset</anno></c> is an empty set,
+ otherwise <c>false</c>.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="is_set" arity="1"/>
<fsummary>Test for an <c>Ordset</c>.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 89fb858823..21f680a0ee 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -133,8 +133,9 @@
variable <c>rand_seed</c> to remember the current state.</p>
<p>If a process calls
- <seealso marker="#uniform-0"><c>uniform/0</c></seealso> or
- <seealso marker="#uniform-1"><c>uniform/1</c></seealso> without
+ <seealso marker="#uniform-0"><c>uniform/0</c></seealso>,
+ <seealso marker="#uniform-1"><c>uniform/1</c></seealso> or
+ <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso> without
setting a seed first, <seealso marker="#seed-1"><c>seed/1</c></seealso>
is called automatically with the default algorithm and creates a
non-constant seed.</p>
@@ -168,10 +169,17 @@ R3 = rand:uniform(),</pre>
S0 = rand:seed_s(exrop),
{R4, S1} = rand:uniform_s(S0),</pre>
+ <p>Textbook basic form Box-Muller standard normal deviate</p>
+
+ <pre>
+R5 = rand:uniform_real(),
+R6 = rand:uniform(),
+SND0 = math:sqrt(-2 * math:log(R5)) * math:cos(math:pi() * R6)</pre>
+
<p>Create a standard normal deviate:</p>
<pre>
-{SND0, S2} = rand:normal_s(S1),</pre>
+{SND1, S2} = rand:normal_s(S1),</pre>
<p>Create a normal deviate with mean -3 and variance 0.5:</p>
@@ -414,7 +422,8 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
This function may return exactly <c>0.0</c> which can be
fatal for certain applications. If that is undesired
you can use <c>(1.0 - rand:uniform())</c> to get the
- interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+ interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+ <seealso marker="#uniform_real-0"><c>uniform_real/0</c></seealso>.
</p>
<p>
If neither endpoint is desired you can test and re-try
@@ -432,6 +441,42 @@ end.</pre>
</func>
<func>
+ <name name="uniform_real" arity="0"/>
+ <fsummary>Return a random float.</fsummary>
+ <desc><marker id="uniform_real-0"/>
+ <p>
+ Returns a random float
+ uniformly distributed in the value range
+ <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+ and updates the state in the process dictionary.
+ </p>
+ <p>
+ Conceptually, a random real number <c>R</c> is generated
+ from the interval <c>0 =&lt; R &lt; 1</c> and then the
+ closest rounded down normalized number
+ in the IEEE 754 Double precision format
+ is returned.
+ </p>
+ <note>
+ <p>
+ The generated numbers from this function has got better
+ granularity for small numbers than the regular
+ <seealso marker="#uniform-0"><c>uniform/0</c></seealso>
+ because all bits in the mantissa are random.
+ This property, in combination with the fact that exactly zero
+ is never returned is useful for algoritms doing for example
+ <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+ </p>
+ </note>
+ <p>
+ See
+ <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>
+ for more explanation.
+ </p>
+ </desc>
+ </func>
+
+ <func>
<name name="uniform" arity="1"/>
<fsummary>Return a random integer.</fsummary>
<desc><marker id="uniform-1"/>
@@ -460,7 +505,8 @@ end.</pre>
This function may return exactly <c>0.0</c> which can be
fatal for certain applications. If that is undesired
you can use <c>(1.0 - rand:uniform(State))</c> to get the
- interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>.
+ interval <c>0.0 &lt; <anno>X</anno> =&lt; 1.0</c>, or instead use
+ <seealso marker="#uniform_real_s-1"><c>uniform_real_s/1</c></seealso>.
</p>
<p>
If neither endpoint is desired you can test and re-try
@@ -478,6 +524,68 @@ end.</pre>
</func>
<func>
+ <name name="uniform_real_s" arity="1"/>
+ <fsummary>Return a random float.</fsummary>
+ <desc>
+ <p>
+ Returns, for a specified state, a random float
+ uniformly distributed in the value range
+ <c>DBL_MIN =&lt; <anno>X</anno> &lt; 1.0</c>
+ and updates the state in the process dictionary.
+ </p>
+ <p>
+ Conceptually, a random real number <c>R</c> is generated
+ from the interval <c>0 =&lt; R &lt; 1</c> and then the
+ closest rounded down normalized number
+ in the IEEE 754 Double precision format
+ is returned.
+ </p>
+ <note>
+ <p>
+ The generated numbers from this function has got better
+ granularity for small numbers than the regular
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+ because all bits in the mantissa are random.
+ This property, in combination with the fact that exactly zero
+ is never returned is useful for algoritms doing for example
+ <c>1.0 / <anno>X</anno></c> or <c>math:log(<anno>X</anno>)</c>.
+ </p>
+ </note>
+ <p>
+ The concept implicates that the probability to get
+ exactly zero is extremely low; so low that this function
+ is in fact guaranteed to never return zero. The smallest
+ number that it might return is <c>DBL_MIN</c>, which is
+ 2.0^(-1022).
+ </p>
+ <p>
+ The value range stated at the top of this function
+ description is technically correct, but
+ <c>0.0 =&lt; <anno>X</anno> &lt; 1.0</c>
+ is a better description of the generated numbers'
+ statistical distribution. Except that exactly 0.0
+ is never returned, which is not possible to observe
+ statistically.
+ </p>
+ <p>
+ For example; for all sub ranges
+ <c>N*2.0^(-53) =&lt; X &lt; (N+1)*2.0^(-53)</c>
+ where
+ <c>0 =&lt; integer(N) &lt; 2.0^53</c>
+ the probability is the same.
+ Compare that with the form of the numbers generated by
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>.
+ </p>
+ <p>
+ Having to generate extra random bits for
+ small numbers costs a little performance.
+ This function is about 20% slower than the regular
+ <seealso marker="#uniform_s-1"><c>uniform_s/1</c></seealso>
+ </p>
+ </desc>
+ </func>
+
+ <func>
<name name="uniform_s" arity="2"/>
<fsummary>Return a random integer.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml
index 878a3babc5..68bfddbc71 100644
--- a/lib/stdlib/doc/src/ref_man.xml
+++ b/lib/stdlib/doc/src/ref_man.xml
@@ -93,6 +93,7 @@
<xi:include href="sys.xml"/>
<xi:include href="timer.xml"/>
<xi:include href="unicode.xml"/>
+ <xi:include href="uri_string.xml"/>
<xi:include href="win32reg.xml"/>
<xi:include href="zip.xml"/>
</application>
diff --git a/lib/stdlib/doc/src/sets.xml b/lib/stdlib/doc/src/sets.xml
index 4934bed365..1ed96ddc3f 100644
--- a/lib/stdlib/doc/src/sets.xml
+++ b/lib/stdlib/doc/src/sets.xml
@@ -140,6 +140,15 @@
</func>
<func>
+ <name name="is_empty" arity="1"/>
+ <fsummary>Test for empty set.</fsummary>
+ <desc>
+ <p>Returns <c>true</c> if <c><anno>Set</anno></c> is an empty set,
+ otherwise <c>false</c>.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="is_set" arity="1"/>
<fsummary>Test for a <c>Set</c>.</fsummary>
<desc>
diff --git a/lib/stdlib/doc/src/specs.xml b/lib/stdlib/doc/src/specs.xml
index 45b207b13d..d559adf9b6 100644
--- a/lib/stdlib/doc/src/specs.xml
+++ b/lib/stdlib/doc/src/specs.xml
@@ -60,6 +60,7 @@
<xi:include href="../specs/specs_sys.xml"/>
<xi:include href="../specs/specs_timer.xml"/>
<xi:include href="../specs/specs_unicode.xml"/>
+ <xi:include href="../specs/specs_uri_string.xml"/>
<xi:include href="../specs/specs_win32reg.xml"/>
<xi:include href="../specs/specs_zip.xml"/>
</specs>
diff --git a/lib/stdlib/doc/src/string.xml b/lib/stdlib/doc/src/string.xml
index 9d5edd9ecf..c7772d63a3 100644
--- a/lib/stdlib/doc/src/string.xml
+++ b/lib/stdlib/doc/src/string.xml
@@ -111,8 +111,8 @@
<c>unicode:chardata()</c></seealso> and operate on grapheme
clusters. The <seealso marker="#oldapi"> <c>old
functions</c></seealso> that only work on Latin-1 lists as input
- are still available but should not be
- used. They will be deprecated in Erlang/OTP 21.
+ are still available but should not be used, they will be
+ deprecated in a future release.
</p>
</description>
@@ -594,7 +594,7 @@ ÖÄÅ</pre>
or <c>both</c>, indicates from which direction characters
are to be removed.
</p>
- <p> Default <c><anno>Characters</anno></c> are the set of
+ <p> Default <c><anno>Characters</anno></c> is the set of
nonbreakable whitespace codepoints, defined as
Pattern_White_Space in
<url href="http://unicode.org/reports/tr31/">Unicode Standard Annex #31</url>.
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 26dc46719e..789e063c12 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -719,8 +719,8 @@ Eshell V5.10.1 (abort with ^G)
</section>
<section>
- <title>Unicode Filenames</title>
<marker id="unicode_file_names"/>
+ <title>Unicode Filenames</title>
<p>Most modern operating systems support Unicode filenames in some way.
There are many different ways to do this and Erlang by default treats the
different approaches differently:</p>
@@ -855,8 +855,12 @@ Eshell V5.10.1 (abort with ^G)
</note>
<section>
- <title>Notes About Raw Filenames</title>
<marker id="notes-about-raw-filenames"/>
+ <title>Notes About Raw Filenames</title>
+ <note><p>
+ Note that raw filenames <em>not</em> necessarily are encoded the
+ same way as on the OS level.
+ </p></note>
<p>Raw filenames were introduced together with Unicode filename support
in ERTS 5.8.2 (Erlang/OTP R14B01). The reason &quot;raw
filenames&quot; were introduced in the system was
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
new file mode 100644
index 0000000000..88d4600611
--- /dev/null
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -0,0 +1,359 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>2017</year><year>2018</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ </legalnotice>
+
+ <title>uri_string</title>
+ <prepared>Péter Dimitrov</prepared>
+ <docno>1</docno>
+ <date>2018-02-07</date>
+ <rev>A</rev>
+ </header>
+ <module>uri_string</module>
+ <modulesummary>URI processing functions.</modulesummary>
+ <description>
+ <p>This module contains functions for parsing and handling URIs
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
+ form-urlencoded query strings (<url href="https://www.w3.org/TR/html52/">HTML 5.2</url>).
+ </p>
+ <p>
+ Parsing and serializing non-UTF-8 form-urlencoded query strings are also supported
+ (<url href="https://www.w3.org/TR/html50/">HTML 5.0</url>).
+ </p>
+ <p>A URI is an identifier consisting of a sequence of characters matching the syntax
+ rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
+ </p>
+ <p> The generic URI syntax consists of a hierarchical sequence of components referred
+ to as the scheme, authority, path, query, and fragment:</p>
+ <pre>
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ </pre><br></br>
+ <p>The interpretation of a URI depends only on the characters used and not on how those
+ characters are represented in a network protocol.</p>
+ <p>The functions implemented by this module cover the following use cases:</p>
+ <list type="bulleted">
+ <item>Parsing URIs into its components and returing a map<br></br>
+ <seealso marker="#parse/1"><c>parse/1</c></seealso>
+ </item>
+ <item>Recomposing a map of URI components into a URI string<br></br>
+ <seealso marker="#recompose/1"><c>recompose/1</c></seealso>
+ </item>
+ <item>Changing inbound binary and percent-encoding of URIs<br></br>
+ <seealso marker="#transcode/2"><c>transcode/2</c></seealso>
+ </item>
+ <item>Transforming URIs into a normalized form<br></br>
+ <seealso marker="#normalize/1"><c>normalize/1</c></seealso><br></br>
+ <seealso marker="#normalize/2"><c>normalize/2</c></seealso>
+ </item>
+ <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
+ <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
+ <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso>
+ </item>
+ <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
+ <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso>
+ </item>
+ </list>
+ <p>There are four different encodings present during the handling of URIs:</p>
+ <list type="bulleted">
+ <item>Inbound binary encoding in binaries</item>
+ <item>Inbound percent-encoding in lists and binaries</item>
+ <item>Outbound binary encoding in binaries</item>
+ <item>Outbound percent-encoding in lists and binaries</item>
+ </list>
+ <p>Functions with <c>uri_string()</c> argument accept lists, binaries and
+ mixed lists (lists with binary elements) as input type. All of the functions but
+ <c>transcode/2</c> expects input as lists of unicode codepoints, UTF-8 encoded binaries
+ and UTF-8 percent-encoded URI parts ("%C3%B6" corresponds to the unicode character "ö").</p>
+ <p>Unless otherwise specified the return value type and encoding are the same as the input
+ type and encoding. That is, binary input returns binary output, list input returns a list
+ output but mixed input returns list output.</p>
+ <p>In case of lists there is only percent-encoding. In binaries, however, both binary encoding
+ and percent-encoding shall be considered. <c>transcode/2</c> provides the means to convert
+ between the supported encodings, it takes a <c>uri_string()</c> and a list of options
+ specifying inbound and outbound encodings.</p>
+ <p><url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> does not mandate any specific
+ character encoding and it is usually defined by the protocol or surrounding text. This library
+ takes the same assumption, binary and percent-encoding are handled as one configuration unit,
+ they cannot be set to different values.</p>
+ </description>
+
+ <datatypes>
+ <datatype>
+ <name name="error"/>
+ <desc>
+ <p>Error tuple indicating the type of error. Possible values of the second component:</p>
+ <list type="bulleted">
+ <item><c>invalid_character</c></item>
+ <item><c>invalid_encoding</c></item>
+ <item><c>invalid_input</c></item>
+ <item><c>invalid_map</c></item>
+ <item><c>invalid_percent_encoding</c></item>
+ <item><c>invalid_scheme</c></item>
+ <item><c>invalid_uri</c></item>
+ <item><c>invalid_utf8</c></item>
+ <item><c>missing_value</c></item>
+ </list>
+ <p>The third component is a term providing additional information about the
+ cause of the error.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_map"/>
+ <desc>
+ <p>Map holding the main components of a URI.</p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="uri_string"/>
+ <desc>
+ <p>List of unicode codepoints, a UTF-8 encoded binary, or a mix of the two,
+ representing an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant URI (<em>percent-encoded form</em>).
+ A URI is a sequence of characters from a very limited set: the letters of
+ the basic Latin alphabet, digits, and a few special characters.</p>
+ </desc>
+ </datatype>
+ </datatypes>
+
+ <funcs>
+
+ <func>
+ <name name="compose_query" arity="1"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
+<![CDATA["foo+bar=1&city=%C3%B6rebro"]]>
+2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
+<![CDATA[<<"foo+bar=1&city=%C3%B6rebro">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="compose_query" arity="2"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Same as <c>compose_query/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls the encoding ("charset")
+ used by the encoding algorithm. There are two supported encodings: <c>utf8</c>
+ (or <c>unicode</c>) and <c>latin1</c>.
+ </p>
+ <p>Each character in the entry's name and value that cannot be expressed using
+ the selected character encoding, is replaced by a string consisting of a U+0026
+ AMPERSAND character (<![CDATA[&]]>), a "#" (U+0023) character, one or more ASCII
+ digits representing the Unicode code point of the character in base ten, and
+ finally a ";" (U+003B) character.
+ </p>
+ <p>Bytes that are out of the range 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F,
+ 0x61 to 0x7A, are percent-encoded (U+0025 PERCENT SIGN character (%) followed by
+ uppercase ASCII hex digits representing the hexadecimal value of the byte).
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
+1> [{encoding, latin1}]).
+<![CDATA["foo+bar=1&city=%F6rebro"
+2> uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"東京"/utf8>>}], [{encoding, latin1}]).]]>
+<![CDATA[<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="dissect_query" arity="1"/>
+ <fsummary>Dissect query string.</fsummary>
+ <desc>
+ <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
+ </p>
+ <p>See also the opposite operation <seealso marker="#compose_query/1">
+ <c>compose_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:dissect_query("foo+bar=1&city=%C3%B6rebro").]]></input>
+[{"foo bar","1"},{"city","örebro"}]
+2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>).]]>
+<![CDATA[[{<<"foo bar">>,<<"1">>},
+ {<<"city">>,<<230,157,177,228,186,172>>}] ]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="normalize" arity="1"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Transforms an <c><anno>URI</anno></c> into a normalized form
+ using Syntax-Based Normalization as defined by
+ <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p>
+ <p>This function implements case normalization, percent-encoding
+ normalization, path segment normalization and scheme based normalization
+ for HTTP(S) with basic support for FTP, SSH, SFTP and TFTP.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g").</input>
+"/a/g"
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>).]]>
+<![CDATA[<<"mid/6">>]]>
+3> uri_string:normalize("http://localhost:80").
+"https://localhost/"
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}).
+"http://localhost-%C3%B6rebro/a/g"
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="normalize" arity="2"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Same as <c>normalize/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls if the normalized URI
+ shall be returned as an uri_map().
+ There is one supported option: <c>return_map</c>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g", [return_map]).</input>
+#{path => "/a/g"}
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>, [return_map]).]]>
+<![CDATA[#{path => <<"mid/6">>}]]>
+3> uri_string:normalize("http://localhost:80", [return_map]).
+#{scheme => "http",path => "/",host => "localhost"}
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}, [return_map]).
+#{scheme => "http",path => "/a/g",host => "localhost-örebro"}
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="parse" arity="1"/>
+ <fsummary>Parse URI into a map.</fsummary>
+ <desc>
+ <p>Parses an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c>uri_string()</c> into a <c>uri_map()</c>, that holds the parsed
+ components of the <c>URI</c>.
+ If parsing fails, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#recompose/1">
+ <c>recompose/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:parse("foo://[email protected]:8042/over/there?name=ferret#nose").</input>
+#{fragment => "nose",host => "example.com",
+ path => "/over/there",port => 8042,query => "name=ferret",
+ scheme => foo,userinfo => "user"}
+2> <![CDATA[uri_string:parse(<<"foo://[email protected]:8042/over/there?name=ferret">>).]]>
+<![CDATA[#{host => <<"example.com">>,path => <<"/over/there">>,
+ port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>,
+ userinfo => <<"user">>}]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="recompose" arity="1"/>
+ <fsummary>Recompose URI.</fsummary>
+ <desc>
+ <p>Creates an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> compliant
+ <c><anno>URIString</anno></c> (percent-encoded), based on the components of
+ <c><anno>URIMap</anno></c>.
+ If the <c><anno>URIMap</anno></c> is invalid, an error tuple is returned.</p>
+ <p>See also the opposite operation <seealso marker="#parse/1">
+ <c>parse/1</c></seealso>.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>URIMap = #{fragment => "nose", host => "example.com", path => "/over/there",</input>
+1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.
+#{fragment => "top",host => "example.com",
+ path => "/over/there",port => 8042,query => "?name=ferret",
+ scheme => foo,userinfo => "user"}
+
+2> <input>uri_string:recompose(URIMap).</input>
+"foo://example.com:8042/over/there?name=ferret#nose"</pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="transcode" arity="2"/>
+ <fsummary>Transcode URI.</fsummary>
+ <desc>
+ <p>Transcodes an <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>
+ compliant <c><anno>URIString</anno></c>,
+ where <c><anno>Options</anno></c> is a list of tagged tuples, specifying the inbound
+ (<c>in_encoding</c>) and outbound (<c>out_encoding</c>) encodings. <c>in_encoding</c>
+ and <c>out_encoding</c> specifies both binary encoding and percent-encoding for the
+ input and output data. Mixed encoding, where binary encoding is not the same as
+ percent-encoding, is not supported.
+ If an argument is invalid, an error tuple is returned.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>,]]></input>
+1> [{in_encoding, utf32},{out_encoding, utf8}]).
+<![CDATA[<<"foo%C3%B6bar"/utf8>>]]>
+2> uri_string:transcode("foo%F6bar", [{in_encoding, latin1},
+2> {out_encoding, utf8}]).
+"foo%C3%B6bar"
+ </pre>
+ </desc>
+ </func>
+
+ </funcs>
+</erlref>