aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/doc/src/erlang.xml53
-rw-r--r--erts/emulator/beam/erl_bif_binary.c3
-rw-r--r--lib/stdlib/doc/src/Makefile1
-rw-r--r--lib/stdlib/doc/src/binary.xml729
-rw-r--r--lib/stdlib/doc/src/ref_man.xml1
-rw-r--r--lib/stdlib/test/binary_module_SUITE.erl5
-rw-r--r--lib/stdlib/test/binref.erl2
7 files changed, 790 insertions, 4 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index cd9bb85f5c..e90160dfd7 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -253,6 +253,54 @@ iolist() = [char() | binary() | iolist()]
</desc>
</func>
<func>
+ <name>binary_part(Subject, PosLen) -> binary()</name>
+ <fsummary>Extracts a part of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>PosLen = {Start,Length}</v>
+ <v>Start = int()</v>
+ <v>Length = int()</v>
+ </type>
+ <desc>
+ <p>Extracts the part of the binary described by <c>PosLen</c>.</p>
+
+ <p>Negative length can be used to extract bytes at the end of a binary:</p>
+
+<code>
+1> Bin = &lt;&lt;1,2,3,4,5,6,7,8,9,10&gt;&gt;.
+2> binary_part(Bin,{byte_size(Bin), -5)).
+&lt;&lt;6,7,8,9,10&gt;&gt;
+</code>
+
+ <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p>
+
+ <p><c>Start</c> is zero-based, i.e:</p>
+<code>
+1> Bin = &lt;&lt;1,2,3&gt;&gt;
+2> binary_part(Bin,{0,2}).
+&lt;&lt;1,2&gt;&gt;
+</code>
+
+ <p>See the STDLIB module <c>binary</c> for details about the <c>PosLen</c> semantics.</p>
+
+ <p>Allowed in guard tests.</p>
+ </desc>
+ </func>
+ <func>
+ <name>binary_part(Subject, Start, Length) -> binary()</name>
+ <fsummary>Extracts a part of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Start = int()</v>
+ <v>Length = int()</v>
+ </type>
+ <desc>
+ <p>The same as <c>binary_part(Subject, {Pos, Len})</c>.</p>
+
+ <p>Allowed in guard tests.</p>
+ </desc>
+ </func>
+ <func>
<name>binary_to_atom(Binary, Encoding) -> atom()</name>
<fsummary>Convert from text representation to an atom</fsummary>
<type>
@@ -318,6 +366,11 @@ iolist() = [char() | binary() | iolist()]
corresponding to the bytes from position <c>Start</c> to
position <c>Stop</c> in <c>Binary</c>. Positions in the
binary are numbered starting from 1.</p>
+
+ <note><p>This functions indexing style of using one-based indices for
+ binaries is deprecated. New code should use the functions in
+ the STDLIB module <c>binary</c> instead. They consequently
+ use the same (zero-based) style of indexing.</p></note>
</desc>
</func>
<func>
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c
index 0a40e28474..3e8480324c 100644
--- a/erts/emulator/beam/erl_bif_binary.c
+++ b/erts/emulator/beam/erl_bif_binary.c
@@ -2369,7 +2369,8 @@ static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en)
goto badarg;
}
if (!n) {
- goto badarg;
+ Eterm res_term = erts_new_heap_binary(p,NULL,0,&bytes);
+ BIF_RET(res_term);
}
ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size);
if (bit_size != 0) {
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile
index 13b9b2ff18..353c1b90b9 100644
--- a/lib/stdlib/doc/src/Makefile
+++ b/lib/stdlib/doc/src/Makefile
@@ -40,6 +40,7 @@ XML_REF3_FILES = \
array.xml \
base64.xml \
beam_lib.xml \
+ binary.xml \
c.xml \
calendar.xml \
dets.xml \
diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml
new file mode 100644
index 0000000000..05ec4406c6
--- /dev/null
+++ b/lib/stdlib/doc/src/binary.xml
@@ -0,0 +1,729 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+ <header>
+ <copyright>
+ <year>2009</year>
+ <year>2010</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved on line at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>binary</title>
+ <prepared>Patrik Nyblom</prepared>
+ <responsible>Kenneth Lundin</responsible>
+ <docno>1</docno>
+ <approved></approved>
+ <checked></checked>
+ <date>2010-05-05</date>
+ <rev>A</rev>
+ <file>binary.xml</file>
+ </header>
+ <module>binary</module>
+ <modulesummary>Library for handling binary data</modulesummary>
+ <description>
+
+ <p>This module contains functions for manipulating byte-oriented
+ binaries. Although the majority of functions could be implemented
+ using bit-syntax, the functions in this library are highly
+ optimized and are expected to either execute faster or consume
+ less memory (or both) than a counterpart written in pure Erlang.</p>
+
+ <p>The module is implemented according to the EEP (Erlang Enhancement Proposal) 31.</p>
+
+ <note>
+ <p>
+ The library handles byte-oriented data. Bitstrings that are not
+ binaries (does not contain whole octets of bits) will result in a <c>badarg</c>
+ exception being thrown from any of the functions in this
+ module.
+ </p>
+ </note>
+
+
+ </description>
+ <section>
+ <title>DATA TYPES</title>
+ <code type="none">
+ cp()
+ - Opaque data-type representing a compiled search-pattern. Guaranteed to be a tuple()
+ to allow programs to distinguish it from non precompiled search patterns.
+ </code>
+ <code type="none">
+ part() = {Start,Length}
+ Start = int()
+ Length = int()
+ - A representaion of a part (or range) in a binary. Start is a
+ zero-based offset into a binary() and Length is the length of
+ that part. As input to functions in this module, a reverse
+ part specification is allowed, constructed with a negative
+ Length, so that the part of the binary begins at Start +
+ Length and is -Length long. This is useful for referencing the
+ last N bytes of a binary as {size(Binary), -N}. The functions
+ in this module always return part()'s with positive Length.
+ </code>
+ </section>
+ <funcs>
+ <func>
+ <name>at(Subject, Pos) -> int()</name>
+ <fsummary>Returns the byte at a specific position in a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pos = int() >= 0</v>
+ </type>
+ <desc>
+
+ <p>Returns the byte at position <c>Pos</c> (zero-based) in the binary
+ <c>Subject</c> as an integer. If <c>Pos</c> &gt;= <c>byte_size(Subject)</c>,
+ a <c>badarg</c>
+ exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>bin_to_list(Subject) -> list()</name>
+ <fsummary>Convert a binary to a list of integers</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ </type>
+ <desc>
+ <p>The same as <c>bin_to_list(Subject,{0,byte_size(Subject)})</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>bin_to_list(Subject, PosLen) -> list()</name>
+ <fsummary>Convert a binary to a list of integers</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>PosLen = part()</v>
+ </type>
+ <desc>
+
+ <p>Converts <c>Subject</c> to a list of <c>int()</c>s, each representing
+ the value of one byte. The <c>part()</c> denotes which part of the
+ <c>binary()</c> to convert. Example:</p>
+
+<code>
+1> binary:bin_to_list(&lt;&lt;"erlang"&gt;&gt;,{1,3}).
+"rla"
+%% or [114,108,97] in list notation.
+</code>
+ <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p>
+ </desc>
+ </func>
+ <func>
+ <name>bin_to_list(Subject, Pos, Len) -> list()</name>
+ <fsummary>Convert a binary to a list of integers</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pos = int()</v>
+ <v>Len = int()</v>
+ </type>
+ <desc>
+ <p>The same as<c> bin_to_list(Subject,{Pos,Len})</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>compile_pattern(Pattern) -> cp()</name>
+ <fsummary>Pre-compiles a binary search pattern</fsummary>
+ <type>
+ <v>Pattern = binary() | [ binary() ]</v>
+ </type>
+ <desc>
+
+ <p>Builds an internal structure representing a compilation of a
+ search-pattern, later to be used in the <seealso marker="#match-3">match/3</seealso>,
+ <seealso marker="#matches-3">matches/3</seealso>,
+ <seealso marker="#split-3">split/3</seealso> or
+ <seealso marker="#replace-4">replace/4</seealso>
+ functions. The <c>cp()</c> returned is guaranteed to be a
+ <c>tuple()</c> to allow programs to distinguish it from non
+ pre-compiled search patterns</p>
+
+ <p>When a list of binaries is given, it denotes a set of
+ alternative binaries to search for. I.e if
+ <c>[&lt;&lt;"functional"&gt;&gt;,&lt;&lt;"programming"&gt;&gt;]</c>
+ is given as <c>Pattern</c>, this
+ means "either <c>&lt;&lt;"functional"&gt;&gt;</c> or
+ <c>&lt;&lt;"programming"&gt;&gt;</c>". The pattern is a set of
+ alternatives; when only a single binary is given, the set has
+ only one element. The order of alternatives in a pattern is not significant.</p>
+
+ <p>The list of binaries used for search alternatives shall be flat and proper.</p>
+
+ <p>If <c>Pattern</c> is not a binary or a flat proper list of binaries with length &gt; 0,
+ a <c>badarg</c> exception will be raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>copy(Subject) -> binary()</name>
+ <fsummary>Creates a duplicate of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ </type>
+ <desc>
+ <p>The same as <c>copy(Subject, 1)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>copy(Subject,N) -> binary()</name>
+ <fsummary>Duplicates a binary N times and creates a new</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>N = int() >= 0</v>
+ </type>
+ <desc>
+ <p>Creates a binary with the content of <c>Subject</c> duplicated <c>N</c> times.</p>
+
+ <p>This function will always create a new binary, even if <c>N =
+ 1</c>. By using <c>copy/1</c> on a binary referencing a larger binary, one
+ might free up the larger binary for garbage collection.</p>
+
+ <note>
+ <p>By deliberately copying a single binary to avoid referencing
+ a larger binary, one might, instead of freeing up the larger
+ binary for later garbage collection, create much more binary
+ data than needed. Sharing binary data is usually good. Only in
+ special cases, when small parts reference large binaries and the
+ large binaries are no longer used in any process, deliberate
+ copying might be a good idea.</p> </note>
+
+ <p>If <c>N</c> &lt; <c>0</c>, a <c>badarg</c> exception is raised.</p>
+ </desc>
+ </func>
+ <func>
+ <name>decode_unsigned(Subject) -> Unsigned</name>
+ <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Unsigned = int() >= 0</v>
+ </type>
+ <desc>
+ <p>The same as <c>decode_unsigned(Subject,big)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>decode_unsigned(Subject, Endianess) -> Unsigned</name>
+ <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Endianess = big | little</v>
+ <v>Unsigned = int() >= 0</v>
+ </type>
+ <desc>
+
+ <p>Converts the binary digit representation, in big or little
+ endian, of a positive integer in <c>Subject</c> to an Erlang <c>int()</c>.</p>
+
+ <p>Example:</p>
+
+ <code>
+1> binary:decode_unsigned(&lt;&lt;169,138,199&gt;&gt;,big).
+11111111
+ </code>
+ </desc>
+ </func>
+ <func>
+ <name>encode_unsigned(Unsigned) -> binary()</name>
+ <fsummary>Encodes an unsigned integer into the minimal binary</fsummary>
+ <type>
+ <v>Unsigned = int() >= 0</v>
+ </type>
+ <desc>
+ <p>The same as <c>encode_unsigned(Unsigned,big)</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>encode_unsigned(Unsigned,Endianess) -> binary()</name>
+ <fsummary>Encodes an unsigned integer into the minimal binary</fsummary>
+ <type>
+ <v>Unsigned = int() >= 0</v>
+ <v>Endianess = big | little</v>
+ </type>
+ <desc>
+
+ <p>Converts a positive integer to the smallest possible
+ representation in a binary digit representation, either big
+ or little endian.</p>
+
+ <p>Example:</p>
+
+ <code>
+1> binary:encode_unsigned(11111111,big).
+&lt;&lt;169,138,199&gt;&gt;
+ </code>
+ </desc>
+ </func>
+ <func>
+ <name>first(Subject) -> int()</name>
+ <fsummary>Returns the first byte of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ </type>
+ <desc>
+
+ <p>Returns the first byte of the binary <c>Subject</c> as an integer. If the
+ size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>last(Subject) -> int()</name>
+ <fsummary>Returns the last byte of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ </type>
+ <desc>
+
+ <p>Returns the last byte of the binary <c>Subject</c> as an integer. If the
+ size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>list_to_bin(ByteList) -> binary()</name>
+ <fsummary>Convert a list of integers and binaries to a binary</fsummary>
+ <type>
+ <v>ByteList = iodata() (see module erlang)</v>
+ </type>
+ <desc>
+ <p>Works exactly as <c>erlang:list_to_binary/1</c>, added for completeness.</p>
+ </desc>
+ </func>
+ <func>
+ <name>longest_common_prefix(Binaries) -> int()</name>
+ <fsummary>Returns length of longest common prefix for a set of binaries</fsummary>
+ <type>
+ <v>Binaries = [ binary() ]</v>
+ </type>
+ <desc>
+
+ <p>Returns the length of the longest common prefix of the
+ binaries in the list <c>Binaries</c>. Example:</p>
+
+<code>
+1> binary:longest_common_prefix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"ergonomy"&gt;&gt;]).
+2
+2> binary:longest_common_prefix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"perl"&gt;&gt;]).
+0
+</code>
+
+ <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p>
+ </desc>
+ </func>
+ <func>
+ <name>longest_common_suffix(Binaries) -> int()</name>
+ <fsummary>Returns length of longest common suffix for a set of binaries</fsummary>
+ <type>
+ <v>Binaries = [ binary() ]</v>
+ </type>
+ <desc>
+
+ <p>Returns the length of the longest common suffix of the
+ binaries in the list <c>Binaries</c>. Example:</p>
+
+<code>
+1> binary:longest_common_suffix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"fang"&gt;&gt;]).
+3
+2> binary:longest_common_suffix([&lt;&lt;"erlang"&gt;&gt;,&lt;&lt;"perl"&gt;&gt;]).
+0
+</code>
+
+ <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>match(Subject, Pattern) -> Found | <c>nomatch</c></name>
+ <fsummary>Searches for the first match of a pattern in a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Found = part()</v>
+ </type>
+ <desc>
+ <p>The same as <c>match(Subject, Pattern, [])</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>match(Subject,Pattern,Options) -> Found | <c>nomatch</c></name>
+ <fsummary>Searches for the first match of a pattern in a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Found = part()</v>
+ <v>Options = [ Option ]</v>
+ <v>Option = {scope, part()}</v>
+ </type>
+ <desc>
+
+ <p>Searches for the first occurrence of <c>Pattern</c> in <c>Subject</c> and
+ returns the position and length.</p>
+
+ <p>The function will return <c>{Pos,Length}</c> for the binary
+ in <c>Pattern</c> starting at the lowest position in
+ <c>Subject</c>, Example:</p>
+
+<code>
+1> binary:match(&lt;&lt;"abcde"&gt;&gt;, [&lt;&lt;"bcde"&gt;&gt;,&lt;&lt;"cd"&gt;&gt;],[]).
+{1,4}
+</code>
+
+ <p>Even though <c>&lt;&lt;"cd"&gt;&gt;</c> ends before
+ <c>&lt;&lt;"bcde"&gt;&gt;</c>, <c>&lt;&lt;"bcde"&gt;&gt;</c>
+ begins first and is therefore the first match. If two
+ overlapping matches begins at the same position, the longest is
+ returned.</p>
+
+ <p>Summary of the options:</p>
+
+ <taglist>
+ <tag>{scope, {Start, Length}}</tag>
+ <item><p>Only the given part is searched. Return values still have
+ offsets from the beginning of <c>Subject</c>. A negative <c>Length</c> is
+ allowed as described in the <c>TYPES</c> section of this manual.</p></item>
+ </taglist>
+
+ <p>If none of the strings in
+ <c>Pattern</c> is found, the atom <c>nomatch</c> is returned.</p>
+
+ <p>For a description of <c>Pattern</c>, see
+ <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+
+ <p>If <c>{scope, {Start,Length}}</c> is given in the options
+ such that <c>Start</c> is larger than the size of
+ <c>Subject</c>, <c>Start + Length</c> is less than zero or
+ <c>Start + Length</c> is larger than the size of
+ <c>Subject</c>, a <c>badarg</c> exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>matches(Subject, Pattern) -> Found</name>
+ <fsummary>Searches for all matches of a pattern in a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Found = [ part() ] | []</v>
+ </type>
+ <desc>
+ <p>The same as <c>matches(Subject, Pattern, [])</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>matches(Subject,Pattern,Options) -> Found</name>
+ <fsummary>Searches for all matches of a pattern in a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Found = [ part() ] | []</v>
+ <v>Options = [ Option ]</v>
+ <v>Option = {scope, part()}</v>
+ </type>
+ <desc>
+
+ <p>Works like match, but the <c>Subject</c> is searched until
+ exhausted and a list of all non-overlapping parts matching
+ <c>Pattern</c> is returned (in order). </p>
+
+ <p>The first and longest match is preferred to a shorter,
+ which is illustrated by the following example:</p>
+
+<code>
+1> binary:matches(&lt;&lt;"abcde"&gt;&gt;,
+ [&lt;&lt;"bcde"&gt;&gt;,&lt;&lt;"bc"&gt;&gt;>,&lt;&lt;"de"&gt;&gt;],[]).
+[{1,4}]
+</code>
+
+ <p>The result shows that &lt;&lt;bcde"&gt;&gt; is selected instead of the
+ shorter match &lt;&lt;"bc"&gt;&gt; (which would have given raise to one
+ more match,&lt;&lt;"de"&gt;&gt;). This corresponds to the behavior of posix
+ regular expressions (and programs like awk), but is not
+ consistent with alternative matches in re (and Perl), where
+ instead lexical ordering in the search pattern selects which
+ string matches.</p>
+
+ <p>If none of the strings in pattern is found, an empty list is returned.</p>
+
+ <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso> and for a
+ description of available options, see <seealso marker="#match-3">match/3</seealso>.</p>
+
+ <p>If <c>{scope, {Start,Length}}</c> is given in the options such that
+ <c>Start</c> is larger than the size of <c>Subject</c>, <c>Start + Length</c> is
+ less than zero or <c>Start + Length</c> is larger than the size of
+ <c>Subject</c>, a <c>badarg</c> exception is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>part(Subject, PosLen) -> binary()</name>
+ <fsummary>Extracts a part of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>PosLen = part()</v>
+ </type>
+ <desc>
+
+ <p>Extracts the part of the binary <c>Subject</c> described by <c>PosLen</c>.</p>
+
+ <p>Negative length can be used to extract bytes at the end of a binary:</p>
+
+<code>
+1> Bin = &lt;&lt;1,2,3,4,5,6,7,8,9,10&gt;&gt;.
+2> binary:part(Bin,{byte_size(Bin), -5)).
+&lt;&lt;6,7,8,9,10&gt;&gt;
+</code>
+
+ <note>
+ <p><seealso marker="#part-2">part/2</seealso>and <seealso
+ marker="#part-3">part/3</seealso> are also available in the
+ <c>erlang</c> module under the names <c>binary_part/2</c> and
+ <c>binary_part/3</c>. Those BIFs are allowed in guard tests.</p>
+ </note>
+
+ <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception
+ is raised.</p>
+
+ </desc>
+ </func>
+ <func>
+ <name>part(Subject, Pos, Len) -> binary()</name>
+ <fsummary>Extracts a part of a binary</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pos = int()</v>
+ <v>Len = int()</v>
+ </type>
+ <desc>
+ <p>The same as <c>part(Subject, {Pos, Len})</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>referenced_byte_size(binary()) -> int()</name>
+ <fsummary>Determines the size of the actual binary pointed out by a sub-binary</fsummary>
+ <desc>
+
+ <p>If a binary references a larger binary (often described as
+ being a sub-binary), it can be useful to get the size of the
+ actual referenced binary. This function can be used in a program
+ to trigger the use of <c>copy/1</c>. By copying a binary, one might
+ dereference the original, possibly large, binary which a smaller
+ binary is a reference to.</p>
+
+ <p>Example:</p>
+
+ <code>
+store(Binary, GBSet) ->
+ NewBin =
+ case binary:referenced_byte_size(Binary) of
+ Large when Large > 2 * byte_size(Binary) ->
+ binary:copy(Binary);
+ _ ->
+ Binary
+ end,
+ gb_sets:insert(NewBin,GBSet).
+ </code>
+
+ <p>In this example, we chose to copy the binary content before
+ inserting it in the <c>gb_set()</c> if it references a binary more than
+ twice the size of the data we're going to keep. Of course
+ different rules for when copying will apply to different
+ programs.</p>
+
+ <p>Binary sharing will occur whenever binaries are taken apart,
+ this is the fundamental reason why binaries are fast,
+ decomposition can always be done with O(1) complexity. In rare
+ circumstances this data sharing is however undesirable, why this
+ function together with <c>copy/1</c> might be useful when optimizing
+ for memory use.</p>
+
+ <p>Example of binary sharing:</p>
+
+ <code>
+1> A = binary:copy(&lt;&lt;1&gt;&gt;,100).
+&lt;&lt;1,1,1,1,1 ...
+2> byte_size(A).
+100
+3> binary:referenced_byte_size(A)
+100
+4> &lt;&lt;_:10/binary,B:10/binary,_/binary&gt;&gt; = A.
+&lt;&lt;1,1,1,1,1 ...
+5> byte_size(B).
+10
+6> binary:referenced_byte_size(B)
+100
+ </code>
+
+ <note>
+ <p>Binary data is shared among processes. If another process
+ still references the larger binary, copying the part this
+ process uses only consumes more memory and will not free up the
+ larger binary for garbage collection. Use this kind of intrusive
+ functions with extreme care, and only if a real problem is
+ detected.</p>
+ </note>
+
+ </desc>
+ </func>
+ <func>
+ <name>replace(Subject,Pattern,Replacement) -> Result</name>
+ <fsummary>Replaces bytes in a binary according to a pattern</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Replacement = binary()</v>
+ <v>Result = binary()</v>
+ </type>
+ <desc>
+ <p>The same as <c>replace(Subject,Pattern,Replacement,[])</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>replace(Subject,Pattern,Replacement,Options) -> Result</name>
+ <fsummary>Replaces bytes in a binary according to a pattern</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Replacement = binary()</v>
+ <v>Result = binary()</v>
+ <v>Options = [ Option ]</v>
+ <v>Option = global | {scope, part()} | {insert_replaced, InsPos}</v>
+ <v>InsPos = OnePos | [ OnePos ]</v>
+ <v>OnePos = int() =&lt; byte_size(Replacement)</v>
+ </type>
+ <desc>
+
+ <p>Constructs a new binary by replacing the parts in
+ <c>Subject</c> matching <c>Pattern</c> with the content of
+ <c>Replacement</c>.</p>
+
+ <p>If the matching sub-part of <c>Subject</c> giving raise to the
+ replacement is to be inserted in the result, the option
+ <c>{insert_replaced, InsPos}</c> will insert the matching part into
+ <c>Replacement</c> at the given position (or positions) before actually
+ inserting <c>Replacement</c> into the <c>Subject</c>. Example:</p>
+
+<code>
+1> binary:replace(&lt;&lt;"abcde"&gt;&gt;,&lt;&lt;"b"&gt;&gt;,&lt;&lt;"[]"&gt;&gt;,[{insert_replaced,1}]).
+&lt;&lt;"a[b]cde"&gt;&gt;
+2> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[]"&gt;&gt;,
+ [global,{insert_replaced,1}]).
+&lt;&lt;"a[b]c[d]e"&gt;&gt;
+3> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[]"&gt;&gt;,
+ [global,{insert_replaced,[1,1]}]).
+&lt;&lt;"a[bb]c[dd]e"&gt;&gt;
+4> binary:replace(&lt;&lt;"abcde"&gt;&gt;,[&lt;&lt;"b"&gt;&gt;,&lt;&lt;"d"&gt;&gt;],&lt;&lt;"[-]"&gt;&gt;,
+ [global,{insert_replaced,[1,2]}]).
+&lt;&lt;"a[b-b]c[d-d]e"&gt;&gt;
+</code>
+
+ <p>If any position given in <c>InsPos</c> is greater than the size of the replacement binary, a <c>badarg</c> exception is raised.</p>
+
+ <p>The options <c>global</c> and <c>{scope, part()}</c> works as for <seealso marker="#split-3">split/3</seealso>. The return type is always a <c>binary()</c>.</p>
+
+ <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>split(Subject,Pattern) -> Parts</name>
+ <fsummary>Splits a binary according to a pattern</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Parts = [ binary() ]</v>
+ </type>
+ <desc>
+ <p>The same as <c>split(Subject, Pattern, [])</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name>split(Subject,Pattern,Options) -> Parts</name>
+ <fsummary>Splits a binary according to a pattern</fsummary>
+ <type>
+ <v>Subject = binary()</v>
+ <v>Pattern = binary() | [ binary() ] | cp()</v>
+ <v>Parts = [ binary() ]</v>
+ <v>Options = [ Option ]</v>
+ <v>Option = {scope, part()} | trim | global</v>
+ </type>
+ <desc>
+
+ <p>Splits Binary into a list of binaries based on Pattern. If
+ the option global is not given, only the first occurrence of
+ Pattern in Subject will give rise to a split.</p>
+
+ <p>The parts of Pattern actually found in Subject are not included in the result.</p>
+
+ <p>Example:</p>
+
+<code>
+1> binary:split(&lt;&lt;1,255,4,0,0,0,2,3&gt;&gt;, [&lt;&lt;0,0,0&gt;&gt;,&lt;&lt;2&gt;&gt;],[]).
+[&lt;&lt;1,255,4&gt;&gt;, &lt;&lt;2,3&gt;&gt;]
+2> binary:split(&lt;&lt;0,1,0,0,4,255,255,9&gt;&gt;, [&lt;&lt;0,0&gt;&gt;, &lt;&lt;255,255&gt;&gt;],[global]).
+[&lt;&lt;0,1&gt;&gt;,&lt;&lt;4&gt;&gt;,&lt;&lt;9&gt;&gt;]
+</code>
+
+ <p>Summary of options:</p>
+ <taglist>
+
+ <tag>{scope, part()}</tag>
+
+ <item><p>Works as in <seealso marker="#match-3">match/3</seealso> and
+ <seealso marker="#matches-3">matches/3</seealso>. Note that
+ this only defines the scope of the search for matching strings,
+ it does not cut the binary before splitting. The bytes before
+ and after the scope will be kept in the result. See example
+ below.</p></item>
+
+ <tag>trim</tag>
+
+ <item><p>Removes trailing empty parts of the result (as does trim in <c>re:split/3</c>)</p></item>
+
+ <tag>global</tag>
+
+ <item><p>Repeats the split until the <c>Subject</c> is
+ exhausted. Conceptually the global option makes split work on
+ the positions returned by <seealso marker="#matches-3">matches/3</seealso>,
+ while it normally
+ works on the position returned by
+ <seealso marker="#match-3">match/3</seealso>.</p></item>
+
+ </taglist>
+
+ <p>Example of the difference between a scope and taking the
+ binary apart before splitting:</p>
+
+<code>
+1> binary:split(&lt;&lt;"banana"&gt;&gt;,[&lt;&lt;"a"&gt;&gt;],[{scope,{2,3}}]).
+[&lt;&lt;"ban"&gt;&gt;,&lt;&lt;"na"&gt;&gt;]
+2> binary:split(binary:part(&lt;&lt;"banana"&gt;&gt;,{2,3}),[&lt;&lt;"a"&gt;&gt;],[]).
+[&lt;&lt;"n"&gt;&gt;,&lt;&lt;"n"&gt;&gt;]
+</code>
+
+ <p>The return type is always a list of binaries that are all
+ referencing <c>Subject</c>. This means that the data in <c>Subject</c> is not
+ actually copied to new binaries and that <c>Subject</c> cannot be
+ garbage collected until the results of the split are no longer
+ referenced.</p>
+
+ <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p>
+
+ </desc>
+ </func>
+ </funcs>
+</erlref>
diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml
index f6ae368e92..de7aeb2274 100644
--- a/lib/stdlib/doc/src/ref_man.xml
+++ b/lib/stdlib/doc/src/ref_man.xml
@@ -37,6 +37,7 @@
<xi:include href="array.xml"/>
<xi:include href="base64.xml"/>
<xi:include href="beam_lib.xml"/>
+ <xi:include href="binary.xml"/>
<xi:include href="c.xml"/>
<xi:include href="calendar.xml"/>
<xi:include href="dets.xml"/>
diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl
index 028b7f0f17..16ed9a2c26 100644
--- a/lib/stdlib/test/binary_module_SUITE.erl
+++ b/lib/stdlib/test/binary_module_SUITE.erl
@@ -727,9 +727,10 @@ copy(Config) when is_list(Config) ->
?line RS = random_string({1,10000}),
?line RS = RS2 = binary:copy(RS),
?line false = erts_debug:same(RS,RS2),
- ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)),
+ ?line <<>> = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)),
?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3:3>>,2)),
- ?line badarg = ?MASK_ERROR(binary:copy(<<>>,0)),
+ ?line badarg = ?MASK_ERROR(binary:copy([],0)),
+ ?line <<>> = ?MASK_ERROR(binary:copy(<<>>,0)),
?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,1.0)),
?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,
16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)),
diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl
index af79c8fa09..6d96736ef3 100644
--- a/lib/stdlib/test/binref.erl
+++ b/lib/stdlib/test/binref.erl
@@ -465,7 +465,7 @@ copy(Subject) ->
copy(Subject,1).
copy(Subject,N) ->
try
- true = is_integer(N) and (N > 0) and is_binary(Subject), % Badarg, not function clause
+ true = is_integer(N) and (N >= 0) and is_binary(Subject), % Badarg, not function clause
erlang:list_to_binary(lists:duplicate(N,Subject))
catch
_:_ ->