Add documentation for binary module

Correct behaviour of copy/2 witn 0 copies.
author: Patrik Nyblom <[email protected]> 2010-05-10 16:27:58 +0200
committer: Björn Gustavsson <[email protected]> 2010-05-17 15:51:50 +0200
commit: 97ab480df55cf574ab42a87b6927ef5bba83000e (patch)
tree: a19bfd73bfb45d772c4fd54cad13c1050a6b59a3
parent: dce00e268eb36048a729db9a2a9aebd4df0e7395 (diff)
download: otp-97ab480df55cf574ab42a87b6927ef5bba83000e.tar.gz
otp-97ab480df55cf574ab42a87b6927ef5bba83000e.tar.bz2
otp-97ab480df55cf574ab42a87b6927ef5bba83000e.zip
7 files changed, 790 insertions, 4 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index cd9bb85f5c..e90160dfd7 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -253,6 +253,54 @@ iolist() = [char() | binary() | iolist()]
       </desc>
     </func>
     <func>
+      <name>binary_part(Subject, PosLen) -> binary()</name>
+      <fsummary>Extracts a part of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+	<v>PosLen = {Start,Length}</v>
+	<v>Start = int()</v>
+	<v>Length = int()</v>
+      </type>
+      <desc>
+      <p>Extracts the part of the binary described by <c>PosLen</c>.</p>
+
+      <p>Negative length can be used to extract bytes at the end of a binary:</p>
+
+<code>
+1> Bin = &lt;&lt;1,2,3,4,5,6,7,8,9,10&gt;&gt;.
+2> binary_part(Bin,{byte_size(Bin), -5)).
+&lt;&lt;6,7,8,9,10&gt;&gt;
+</code>
+
+      <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p>
+
+      <p><c>Start</c> is zero-based, i.e:</p>
+<code>
+1> Bin = &lt;&lt;1,2,3&gt;&gt;
+2> binary_part(Bin,{0,2}).
+&lt;&lt;1,2&gt;&gt;
+</code>
+
+      <p>See the STDLIB module <c>binary</c> for details about the <c>PosLen</c> semantics.</p>
+
+      <p>Allowed in guard tests.</p>
+      </desc>
+    </func>
+    <func>
+      <name>binary_part(Subject, Start, Length) -> binary()</name>
+      <fsummary>Extracts a part of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+	<v>Start = int()</v>
+	<v>Length = int()</v>
+      </type>
+      <desc>
+      <p>The same as <c>binary_part(Subject, {Pos, Len})</c>.</p>
+
+      <p>Allowed in guard tests.</p>
+      </desc>
+    </func>
+    <func>
       <name>binary_to_atom(Binary, Encoding) -> atom()</name>
       <fsummary>Convert from text representation to an atom</fsummary>
       <type>
@@ -318,6 +366,11 @@ iolist() = [char() | binary() | iolist()]
           corresponding to the bytes from position <c>Start</c> to
           position <c>Stop</c> in <c>Binary</c>. Positions in the
           binary are numbered starting from 1.</p>
+
+	  <note><p>This functions indexing style of using one-based indices for
+	  binaries is deprecated. New code should use the functions in
+	  the STDLIB module <c>binary</c> instead. They consequently
+	  use the same (zero-based) style of indexing.</p></note>
       </desc>
     </func>
     <func>
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c
index 0a40e28474..3e8480324c 100644
--- a/erts/emulator/beam/erl_bif_binary.c
+++ b/erts/emulator/beam/erl_bif_binary.c
@@ -2369,7 +2369,8 @@ static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en)
 	goto badarg;
     }
     if (!n) {
-	goto badarg;
+	Eterm res_term = erts_new_heap_binary(p,NULL,0,&bytes);
+	BIF_RET(res_term);
     }
     ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size);
     if (bit_size != 0) {
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile
index 13b9b2ff18..353c1b90b9 100644
--- a/lib/stdlib/doc/src/Makefile
+++ b/lib/stdlib/doc/src/Makefile
@@ -40,6 +40,7 @@ XML_REF3_FILES = \
 	array.xml \
 	base64.xml \
 	beam_lib.xml \
+	binary.xml \
 	c.xml \
 	calendar.xml \
 	dets.xml \
diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml
new file mode 100644
index 0000000000..05ec4406c6
--- /dev/null
+++ b/lib/stdlib/doc/src/binary.xml
@@ -0,0 +1,729 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+  <header>
+    <copyright>
+      <year>2009</year>
+      <year>2010</year>
+      <holder>Ericsson AB, All Rights Reserved</holder>
+    </copyright>
+    <legalnotice>
+  The contents of this file are subject to the Erlang Public License,
+  Version 1.1, (the "License"); you may not use this file except in
+  compliance with the License. You should have received a copy of the
+  Erlang Public License along with this software. If not, it can be
+  retrieved on line at http://www.erlang.org/.
+
+  Software distributed under the License is distributed on an "AS IS"
+  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+  the License for the specific language governing rights and limitations
+  under the License.
+
+  The Initial Developer of the Original Code is Ericsson AB.
+    </legalnotice>
+
+    <title>binary</title>
+    <prepared>Patrik Nyblom</prepared>
+    <responsible>Kenneth Lundin</responsible>
+    <docno>1</docno>
+    <approved></approved>
+    <checked></checked>
+    <date>2010-05-05</date>
+    <rev>A</rev>
+    <file>binary.xml</file>
+  </header>
+  <module>binary</module>
+  <modulesummary>Library for handling binary data</modulesummary>
+  <description>
+
+    <p>This module contains functions for manipulating byte-oriented
+    binaries. Although the majority of functions could be implemented
+    using bit-syntax, the functions in this library are highly
+    optimized and are expected to either execute faster or consume
+    less memory (or both) than a counterpart written in pure Erlang.</p>
+
+    <p>The module is implemented according to the EEP (Erlang Enhancement Proposal) 31.</p>
+
+    <note>
+      <p>
+      The library handles byte-oriented data. Bitstrings that are not
+      binaries (does not contain whole octets of bits) will result in a <c>badarg</c>
+      exception being thrown from any of the functions in this
+      module.
+      </p>
+    </note>
+
+
+  </description>
+  <section>
+      <title>DATA TYPES</title>
+      <code type="none">
+    cp()
+     - Opaque data-type representing a compiled search-pattern. Guaranteed to be a tuple()
+       to allow programs to distinguish it from non precompiled search patterns.
+      </code>
+      <code type="none">
+    part() = {Start,Length}
+    Start = int()
+    Length = int()
+      - A representaion of a part (or range) in a binary. Start is a
+        zero-based offset into a binary() and Length is the length of
+        that part. As input to functions in this module, a reverse
+        part specification is allowed, constructed with a negative
+        Length, so that the part of the binary begins at Start +
+        Length and is -Length long. This is useful for referencing the
+        last N bytes of a binary as {size(Binary), -N}. The functions
+        in this module always return part()'s with positive Length.
+      </code>
+  </section>
+  <funcs>
+    <func>
+      <name>at(Subject, Pos) -> int()</name>
+      <fsummary>Returns the byte at a specific position in a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pos = int() >= 0</v>
+      </type>
+      <desc>
+
+      <p>Returns the byte at position <c>Pos</c> (zero-based) in the binary
+      <c>Subject</c> as an integer. If <c>Pos</c> &gt;= <c>byte_size(Subject)</c>,
+      a <c>badarg</c>
+      exception is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>bin_to_list(Subject) -> list()</name>
+      <fsummary>Convert a binary to a list of integers</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+      </type>
+      <desc>
+      <p>The same as <c>bin_to_list(Subject,{0,byte_size(Subject)})</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>bin_to_list(Subject, PosLen) -> list()</name>
+      <fsummary>Convert a binary to a list of integers</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>PosLen = part()</v>
+      </type>
+      <desc>
+
+      <p>Converts <c>Subject</c> to a list of <c>int()</c>s, each representing
+      the value of one byte. The <c>part()</c> denotes which part of the
+      <c>binary()</c> to convert. Example:</p>
+
+<code>
+1> binary:bin_to_list(&lt;&lt;"erlang"&gt;&gt;,{1,3}).
+"rla"
+%% or [114,108,97] in list notation.
+</code>
+      <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p>
+      </desc>
+    </func>
+    <func>
+      <name>bin_to_list(Subject, Pos, Len) -> list()</name>
+      <fsummary>Convert a binary to a list of integers</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pos = int()</v>
+        <v>Len = int()</v>
+      </type>
+      <desc>
+      <p>The same as<c> bin_to_list(Subject,{Pos,Len})</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>compile_pattern(Pattern) -> cp()</name>
+      <fsummary>Pre-compiles a binary search pattern</fsummary>
+      <type>
+        <v>Pattern = binary() | [ binary() ]</v>
+      </type>
+      <desc>
+
+      <p>Builds an internal structure representing a compilation of a
+      search-pattern, later to be used in the <seealso marker="#match-3">match/3</seealso>,
+      <seealso marker="#matches-3">matches/3</seealso>,
+      <seealso marker="#split-3">split/3</seealso> or
+      <seealso marker="#replace-4">replace/4</seealso>
+      functions. The <c>cp()</c> returned is guaranteed to be a
+      <c>tuple()</c> to allow programs to distinguish it from non
+      pre-compiled search patterns</p>
+
+      <p>When a list of binaries is given, it denotes a set of
+      alternative binaries to search for. I.e if
+      <c>[&lt;&lt;"functional"&gt;&gt;,&lt;&lt;"programming"&gt;&gt;]</c>
+      is given as <c>Pattern</c>, this
+      means "either <c>&lt;&lt;"functional"&gt;&gt;</c> or
+      <c>&lt;&lt;"programming"&gt;&gt;</c>". The pattern is a set of
+      alternatives; when only a single binary is given, the set has
+      only one element. The order of alternatives in a pattern is not significant.</p>
+
+      <p>The list of binaries used for search alternatives shall be flat and proper.</p>
+
+      <p>If <c>Pattern</c> is not a binary or a flat proper list of binaries with length &gt; 0,
+      a <c>badarg</c> exception will be raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>copy(Subject) -> binary()</name>
+      <fsummary>Creates a duplicate of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+      </type>
+      <desc>
+      <p>The same as <c>copy(Subject, 1)</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>copy(Subject,N) -> binary()</name>
+      <fsummary>Duplicates a binary N times and creates a new</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>N = int() >= 0</v>
+      </type>
+      <desc>
+      <p>Creates a binary with the content of <c>Subject</c> duplicated <c>N</c> times.</p>
+
+      <p>This function will always create a new binary, even if <c>N =
+      1</c>. By using <c>copy/1</c> on a binary referencing a larger binary, one
+      might free up the larger binary for garbage collection.</p>
+
+      <note>
+      <p>By deliberately copying a single binary to avoid referencing
+      a larger binary, one might, instead of freeing up the larger
+      binary for later garbage collection, create much more binary
+      data than needed. Sharing binary data is usually good. Only in
+      special cases, when small parts reference large binaries and the
+      large binaries are no longer used in any process, deliberate
+      copying might be a good idea.</p> </note>
+
+      <p>If <c>N</c> &lt; <c>0</c>, a <c>badarg</c> exception is raised.</p>
+      </desc>
+    </func>
+    <func>
+      <name>decode_unsigned(Subject) -> Unsigned</name>
+      <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Unsigned = int() >= 0</v>
+      </type>
+      <desc>
+      <p>The same as <c>decode_unsigned(Subject,big)</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>decode_unsigned(Subject, Endianess) -> Unsigned</name>
+      <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Endianess = big | little</v>
+        <v>Unsigned = int() >= 0</v>
+      </type>
+      <desc>
+
+      <p>Converts the binary digit representation, in big or little
+      endian, of a positive integer in <c>Subject</c> to an Erlang <c>int()</c>.</p>
+
+      <p>Example:</p>
+
+      <code>
+1> binary:decode_unsigned(&lt;&lt;169,138,199&gt;&gt;,big).
+11111111
+      </code>
+      </desc>
+    </func>
+    <func>
+      <name>encode_unsigned(Unsigned) -> binary()</name>
+      <fsummary>Encodes an unsigned integer into the minimal binary</fsummary>
+      <type>
+        <v>Unsigned = int() >= 0</v>
+      </type>
+      <desc>
+      <p>The same as <c>encode_unsigned(Unsigned,big)</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>encode_unsigned(Unsigned,Endianess) -> binary()</name>
+      <fsummary>Encodes an unsigned integer into the minimal binary</fsummary>
+      <type>
+        <v>Unsigned = int() >= 0</v>
+        <v>Endianess = big | little</v>
+      </type>
+      <desc>
+
+      <p>Converts a positive integer to the smallest possible
+      representation in a binary digit representation, either big
+      or little endian.</p>
+
+      <p>Example:</p>
+
+      <code>
+1> binary:encode_unsigned(11111111,big).
+&lt;&lt;169,138,199&gt;&gt;
+      </code>
+      </desc>
+    </func>
+    <func>
+      <name>first(Subject) -> int()</name>
+      <fsummary>Returns the first byte of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+      </type>
+      <desc>
+
+      <p>Returns the first byte of the binary <c>Subject</c> as an integer. If the
+      size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>last(Subject) -> int()</name>
+      <fsummary>Returns the last byte of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+      </type>
+      <desc>
+
+      <p>Returns the last byte of the binary <c>Subject</c> as an integer. If the
+      size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>list_to_bin(ByteList) -> binary()</name>
+      <fsummary>Convert a list of integers and binaries to a binary</fsummary>
+      <type>
+        <v>ByteList = iodata() (see module erlang)</v>
+      </type>
+      <desc>
+      <p>Works exactly as <c>erlang:list_to_binary/1</c>, added for completeness.</p>
+      </desc>
+    </func>
+    <func>
+      <name>longest_common_prefix(Binaries) -> int()</name>
+      <fsummary>Returns length of longest common prefix for a set of binaries</fsummary>
+      <type>
+        <v>Binaries = [ binary() ]</v>
+      </type>
+      <desc>
+
+      <p>Returns the length of the longest common prefix of the
+      binaries in the list <c>Binaries</c>. Example:</p>
+
+<code>
+1> binary:longest_common_prefix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"ergonomy"&gt;&gt;]).
+2
+2> binary:longest_common_prefix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"perl"&gt;&gt;]).
+0
+</code>
+
+      <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p>
+      </desc>
+    </func>
+    <func>
+      <name>longest_common_suffix(Binaries) -> int()</name>
+      <fsummary>Returns length of longest common suffix for a set of binaries</fsummary>
+      <type>
+        <v>Binaries = [ binary() ]</v>
+      </type>
+      <desc>
+
+      <p>Returns the length of the longest common suffix of the
+      binaries in the list <c>Binaries</c>. Example:</p>
+
+<code>
+1> binary:longest_common_suffix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"fang"&gt;&gt;]).
+3
+2> binary:longest_common_suffix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"perl"&gt;&gt;]).
+0
+</code>
+
+      <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>match(Subject, Pattern) -> Found | <c>nomatch</c></name>
+      <fsummary>Searches for the first match of a pattern in a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+	<v>Pattern = binary() | [ binary() ] | cp()</v>
+	<v>Found = part()</v>
+      </type>
+      <desc>
+      <p>The same as <c>match(Subject, Pattern, [])</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>match(Subject,Pattern,Options) -> Found | <c>nomatch</c></name>
+      <fsummary>Searches for the first match of a pattern in a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+	<v>Pattern = binary() | [ binary() ] | cp()</v>
+	<v>Found = part()</v>
+        <v>Options = [ Option ]</v>
+	<v>Option = {scope, part()}</v>
+      </type>
+      <desc>
+
+      <p>Searches for the first occurrence of <c>Pattern</c> in <c>Subject</c> and
+      returns the position and length.</p>
+
+      <p>The function will return <c>{Pos,Length}</c> for the binary
+      in <c>Pattern</c> starting at the lowest position in
+      <c>Subject</c>, Example:</p>
+
+<code>
+1> binary:match(&lt;&lt;"abcde"&gt;&gt;, [&lt;&lt;"bcde"&gt;&gt;,&lt;&lt;"cd"&gt;&gt;],[]).
+{1,4}
+</code>
+
+      <p>Even though <c>&lt;&lt;"cd"&gt;&gt;</c> ends before
+      <c>&lt;&lt;"bcde"&gt;&gt;</c>, <c>&lt;&lt;"bcde"&gt;&gt;</c>
+      begins first and is therefore the first match. If two
+      overlapping matches begins at the same position, the longest is
+      returned.</p>
+
+      <p>Summary of the options:</p>
+
+      <taglist>
+      <tag>{scope, {Start, Length}}</tag>
+      <item><p>Only the given part is searched. Return values still have
+      offsets from the beginning of <c>Subject</c>. A negative <c>Length</c> is
+      allowed as described in the <c>TYPES</c> section of this manual.</p></item>
+      </taglist>
+
+      <p>If none of the strings in
+      <c>Pattern</c> is found, the atom <c>nomatch</c> is returned.</p>
+
+      <p>For a description of <c>Pattern</c>, see
+      <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+
+      <p>If <c>{scope, {Start,Length}}</c> is given in the options
+      such that <c>Start</c> is larger than the size of
+      <c>Subject</c>, <c>Start + Length</c> is less than zero or
+      <c>Start + Length</c> is larger than the size of
+      <c>Subject</c>, a <c>badarg</c> exception is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>matches(Subject, Pattern) -> Found</name>
+      <fsummary>Searches for all matches of a pattern in a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Found = [ part() ] | []</v>
+      </type>
+      <desc>
+      <p>The same as <c>matches(Subject, Pattern, [])</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>matches(Subject,Pattern,Options) -> Found</name>
+      <fsummary>Searches for all matches of a pattern in a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Found = [ part() ] | []</v>
+        <v>Options = [ Option ]</v>
+        <v>Option = {scope, part()}</v>
+      </type>
+      <desc>
+
+      <p>Works like match, but the <c>Subject</c> is searched until
+      exhausted and a list of all non-overlapping parts matching
+      <c>Pattern</c> is returned (in order). </p>
+
+      <p>The first and longest match is preferred to a shorter,
+      which is illustrated by the following example:</p>
+
+<code>
+1> binary:matches(&lt;&lt;"abcde"&gt;&gt;,
+                  [&lt;&lt;"bcde"&gt;&gt;,&lt;&lt;"bc"&gt;&gt;>,&lt;&lt;"de"&gt;&gt;],[]).
+[{1,4}]
+</code>
+
+       <p>The result shows that &lt;&lt;bcde"&gt;&gt; is selected instead of the
+       shorter match &lt;&lt;"bc"&gt;&gt; (which would have given raise to one
+       more match,&lt;&lt;"de"&gt;&gt;). This corresponds to the behavior of posix
+       regular expressions (and programs like awk), but is not
+       consistent with alternative matches in re (and Perl), where
+       instead lexical ordering in the search pattern selects which
+       string matches.</p>
+
+       <p>If none of the strings in pattern is found, an empty list is returned.</p>
+
+       <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso> and for a
+       description of available options, see <seealso marker="#match-3">match/3</seealso>.</p>
+
+       <p>If <c>{scope, {Start,Length}}</c> is given in the options such that
+       <c>Start</c> is larger than the size of <c>Subject</c>, <c>Start + Length</c> is
+       less than zero or <c>Start + Length</c> is larger than the size of
+       <c>Subject</c>, a <c>badarg</c> exception is raised.</p>
+
+     </desc>
+    </func>
+    <func>
+      <name>part(Subject, PosLen) -> binary()</name>
+      <fsummary>Extracts a part of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>PosLen = part()</v>
+      </type>
+      <desc>
+
+      <p>Extracts the part of the binary <c>Subject</c> described by <c>PosLen</c>.</p>
+
+      <p>Negative length can be used to extract bytes at the end of a binary:</p>
+
+<code>
+1> Bin = &lt;&lt;1,2,3,4,5,6,7,8,9,10&gt;&gt;.
+2> binary:part(Bin,{byte_size(Bin), -5)).
+&lt;&lt;6,7,8,9,10&gt;&gt;
+</code>
+
+      <note>
+      <p><seealso marker="#part-2">part/2</seealso>and <seealso
+      marker="#part-3">part/3</seealso> are also available in the
+      <c>erlang</c> module under the names <c>binary_part/2</c> and
+      <c>binary_part/3</c>. Those BIFs are allowed in guard tests.</p>
+      </note>
+
+      <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception
+      is raised.</p>
+
+      </desc>
+    </func>
+    <func>
+      <name>part(Subject, Pos, Len) -> binary()</name>
+      <fsummary>Extracts a part of a binary</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pos = int()</v>
+        <v>Len = int()</v>
+      </type>
+      <desc>
+      <p>The same as <c>part(Subject, {Pos, Len})</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>referenced_byte_size(binary()) -> int()</name>
+      <fsummary>Determines the size of the actual binary pointed out by a sub-binary</fsummary>
+      <desc>
+
+     <p>If a binary references a larger binary (often described as
+     being a sub-binary), it can be useful to get the size of the
+     actual referenced binary. This function can be used in a program
+     to trigger the use of <c>copy/1</c>. By copying a binary, one might
+     dereference the original, possibly large, binary which a smaller
+     binary is a reference to.</p>
+
+      <p>Example:</p>
+
+      <code>
+store(Binary, GBSet) ->
+  NewBin =
+      case binary:referenced_byte_size(Binary) of
+          Large when Large > 2 * byte_size(Binary) ->
+             binary:copy(Binary);
+          _ ->
+             Binary
+      end,
+  gb_sets:insert(NewBin,GBSet).
+      </code>
+
+      <p>In this example, we chose to copy the binary content before
+      inserting it in the <c>gb_set()</c> if it references a binary more than
+      twice the size of the data we're going to keep. Of course
+      different rules for when copying will apply to different
+      programs.</p>
+
+      <p>Binary sharing will occur whenever binaries are taken apart,
+      this is the fundamental reason why binaries are fast,
+      decomposition can always be done with O(1) complexity. In rare
+      circumstances this data sharing is however undesirable, why this
+      function together with <c>copy/1</c> might be useful when optimizing
+      for memory use.</p>
+
+      <p>Example of binary sharing:</p>
+
+      <code>
+1> A = binary:copy(&lt;&lt;1&gt;&gt;,100).
+&lt;&lt;1,1,1,1,1 ...
+2> byte_size(A).
+100
+3> binary:referenced_byte_size(A)
+100
+4> &lt;&lt;_:10/binary,B:10/binary,_/binary&gt;&gt; = A.
+&lt;&lt;1,1,1,1,1 ...
+5> byte_size(B).
+10
+6> binary:referenced_byte_size(B)
+100
+      </code>
+
+      <note>
+      <p>Binary data is shared among processes. If another process
+      still references the larger binary, copying the part this
+      process uses only consumes more memory and will not free up the
+      larger binary for garbage collection. Use this kind of intrusive
+      functions with extreme care, and only if a real problem is
+      detected.</p>
+      </note>
+
+      </desc>
+    </func>
+    <func>
+      <name>replace(Subject,Pattern,Replacement) -> Result</name>
+      <fsummary>Replaces bytes in a binary according to a pattern</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Replacement = binary()</v>
+        <v>Result = binary()</v>
+      </type>
+      <desc>
+      <p>The same as <c>replace(Subject,Pattern,Replacement,[])</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>replace(Subject,Pattern,Replacement,Options) -> Result</name>
+      <fsummary>Replaces bytes in a binary according to a pattern</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Replacement = binary()</v>
+        <v>Result = binary()</v>
+        <v>Options = [ Option ]</v>
+        <v>Option = global | {scope, part()} | {insert_replaced, InsPos}</v>
+        <v>InsPos = OnePos | [ OnePos ]</v>
+        <v>OnePos = int() =&lt; byte_size(Replacement)</v>
+      </type>
+      <desc>
+
+      <p>Constructs a new binary by replacing the parts in
+      <c>Subject</c> matching <c>Pattern</c> with the content of
+      <c>Replacement</c>.</p>
+
+      <p>If the matching sub-part of <c>Subject</c> giving raise to the
+      replacement is to be inserted in the result, the option
+      <c>{insert_replaced, InsPos}</c> will insert the matching part into
+      <c>Replacement</c> at the given position (or positions) before actually
+      inserting <c>Replacement</c> into the <c>Subject</c>. Example:</p>
+
+<code>
+1> binary:replace(&lt;&lt;"abcde"&gt;&gt;,&lt;&lt;"b"&gt;&gt;,&lt;&lt;"[]"&gt;&gt;,[{insert_replaced,1}]).
+&lt;&lt;"a[b]cde"&gt;&gt;
+2> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[]"&gt;&gt;,
+                 [global,{insert_replaced,1}]).
+&lt;&lt;"a[b]c[d]e"&gt;&gt;
+3> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[]"&gt;&gt;,
+                 [global,{insert_replaced,[1,1]}]).
+&lt;&lt;"a[bb]c[dd]e"&gt;&gt;
+4> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[-]"&gt;&gt;,
+                 [global,{insert_replaced,[1,2]}]).
+&lt;&lt;"a[b-b]c[d-d]e"&gt;&gt;
+</code>
+
+        <p>If any position given in <c>InsPos</c> is greater than the size of the replacement binary, a <c>badarg</c> exception is raised.</p>
+
+	<p>The options <c>global</c> and <c>{scope, part()}</c> works as for <seealso marker="#split-3">split/3</seealso>. The return type is always a <c>binary()</c>.</p>
+
+	<p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>split(Subject,Pattern) -> Parts</name>
+      <fsummary>Splits a binary according to a pattern</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Parts = [ binary() ]</v>
+      </type>
+      <desc>
+      <p>The same as <c>split(Subject, Pattern, [])</c>.</p>
+      </desc>
+    </func>
+    <func>
+      <name>split(Subject,Pattern,Options) -> Parts</name>
+      <fsummary>Splits a binary according to a pattern</fsummary>
+      <type>
+        <v>Subject = binary()</v>
+        <v>Pattern = binary() | [ binary() ] | cp()</v>
+        <v>Parts = [ binary() ]</v>
+        <v>Options = [ Option ]</v>
+        <v>Option = {scope, part()} | trim | global</v>
+      </type>
+      <desc>
+
+      <p>Splits Binary into a list of binaries based on Pattern. If
+      the option global is not given, only the first occurrence of
+      Pattern in Subject will give rise to a split.</p>
+
+      <p>The parts of Pattern actually found in Subject are not included in the result.</p>
+
+      <p>Example:</p>
+
+<code>
+1> binary:split(&lt;&lt;1,255,4,0,0,0,2,3&gt;&gt;, [&lt;&lt;0,0,0&gt;&gt;,&lt;&lt;2&gt;&gt;],[]).
+[&lt;&lt;1,255,4&gt;&gt;, &lt;&lt;2,3&gt;&gt;]
+2> binary:split(&lt;&lt;0,1,0,0,4,255,255,9&gt;&gt;, [&lt;&lt;0,0&gt;&gt;, &lt;&lt;255,255&gt;&gt;],[global]).
+[&lt;&lt;0,1&gt;&gt;,&lt;&lt;4&gt;&gt;,&lt;&lt;9&gt;&gt;]
+</code>
+
+      <p>Summary of options:</p>
+      <taglist>
+
+      <tag>{scope, part()}</tag>
+
+      <item><p>Works as in <seealso marker="#match-3">match/3</seealso> and
+      <seealso marker="#matches-3">matches/3</seealso>. Note that
+      this only defines the scope of the search for matching strings,
+      it does not cut the binary before splitting. The bytes before
+      and after the scope will be kept in the result. See example
+      below.</p></item>
+
+      <tag>trim</tag>
+
+      <item><p>Removes trailing empty parts of the result (as does trim in <c>re:split/3</c>)</p></item>
+
+      <tag>global</tag>
+
+      <item><p>Repeats the split until the <c>Subject</c> is
+      exhausted. Conceptually the global option makes split work on
+      the positions returned by <seealso marker="#matches-3">matches/3</seealso>,
+      while it normally
+      works on the position returned by
+      <seealso marker="#match-3">match/3</seealso>.</p></item>
+
+      </taglist>
+
+     <p>Example of the difference between a scope and taking the
+     binary apart before splitting:</p>
+
+<code>
+1> binary:split(&lt;&lt;"banana"&gt;&gt;,[&lt;&lt;"a"&gt;&gt;],[{scope,{2,3}}]).
+[&lt;&lt;"ban"&gt;&gt;,&lt;&lt;"na"&gt;&gt;]
+2> binary:split(binary:part(&lt;&lt;"banana"&gt;&gt;,{2,3}),[&lt;&lt;"a"&gt;&gt;],[]).
+[&lt;&lt;"n"&gt;&gt;,&lt;&lt;"n"&gt;&gt;]
+</code>
+
+      <p>The return type is always a list of binaries that are all
+      referencing <c>Subject</c>. This means that the data in <c>Subject</c> is not
+      actually copied to new binaries and that <c>Subject</c> cannot be
+      garbage collected until the results of the split are no longer
+      referenced.</p>
+
+      <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+
+      </desc>
+    </func>
+  </funcs>
+</erlref>
diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml
index f6ae368e92..de7aeb2274 100644
--- a/lib/stdlib/doc/src/ref_man.xml
+++ b/lib/stdlib/doc/src/ref_man.xml
@@ -37,6 +37,7 @@
   <xi:include href="array.xml"/>
   <xi:include href="base64.xml"/>
   <xi:include href="beam_lib.xml"/>
+  <xi:include href="binary.xml"/>
   <xi:include href="c.xml"/>
   <xi:include href="calendar.xml"/>
   <xi:include href="dets.xml"/>
diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl
index 028b7f0f17..16ed9a2c26 100644
--- a/lib/stdlib/test/binary_module_SUITE.erl
+++ b/lib/stdlib/test/binary_module_SUITE.erl
@@ -727,9 +727,10 @@ copy(Config) when is_list(Config) ->
     ?line RS = random_string({1,10000}),
     ?line RS = RS2 = binary:copy(RS),
     ?line false = erts_debug:same(RS,RS2),
-    ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)),
+    ?line <<>> = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)),
     ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3:3>>,2)),
-    ?line badarg = ?MASK_ERROR(binary:copy(<<>>,0)),
+    ?line badarg = ?MASK_ERROR(binary:copy([],0)),
+    ?line <<>> = ?MASK_ERROR(binary:copy(<<>>,0)),
     ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,1.0)),
     ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,
 					   16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)),
diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl
index af79c8fa09..6d96736ef3 100644
--- a/lib/stdlib/test/binref.erl
+++ b/lib/stdlib/test/binref.erl
@@ -465,7 +465,7 @@ copy(Subject) ->
     copy(Subject,1).
 copy(Subject,N) ->
     try
-	true = is_integer(N) and (N > 0) and is_binary(Subject), % Badarg, not function clause
+	true = is_integer(N) and (N >= 0) and is_binary(Subject), % Badarg, not function clause
 	erlang:list_to_binary(lists:duplicate(N,Subject))
     catch
 	_:_ ->
author	Patrik Nyblom <[email protected]>	2010-05-10 16:27:58 +0200
committer	Björn Gustavsson <[email protected]>	2010-05-17 15:51:50 +0200
commit	97ab480df55cf574ab42a87b6927ef5bba83000e (patch)
tree	a19bfd73bfb45d772c4fd54cad13c1050a6b59a3
parent	dce00e268eb36048a729db9a2a9aebd4df0e7395 (diff)
download	otp-97ab480df55cf574ab42a87b6927ef5bba83000e.tar.gz otp-97ab480df55cf574ab42a87b6927ef5bba83000e.tar.bz2 otp-97ab480df55cf574ab42a87b6927ef5bba83000e.zip