diff options
-rw-r--r-- | erts/doc/src/erlang.xml | 53 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_binary.c | 3 | ||||
-rw-r--r-- | lib/stdlib/doc/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/doc/src/binary.xml | 729 | ||||
-rw-r--r-- | lib/stdlib/doc/src/ref_man.xml | 1 | ||||
-rw-r--r-- | lib/stdlib/test/binary_module_SUITE.erl | 5 | ||||
-rw-r--r-- | lib/stdlib/test/binref.erl | 2 |
7 files changed, 790 insertions, 4 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index cd9bb85f5c..e90160dfd7 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -253,6 +253,54 @@ iolist() = [char() | binary() | iolist()] </desc> </func> <func> + <name>binary_part(Subject, PosLen) -> binary()</name> + <fsummary>Extracts a part of a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>PosLen = {Start,Length}</v> + <v>Start = int()</v> + <v>Length = int()</v> + </type> + <desc> + <p>Extracts the part of the binary described by <c>PosLen</c>.</p> + + <p>Negative length can be used to extract bytes at the end of a binary:</p> + +<code> +1> Bin = <<1,2,3,4,5,6,7,8,9,10>>. +2> binary_part(Bin,{byte_size(Bin), -5)). +<<6,7,8,9,10>> +</code> + + <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p> + + <p><c>Start</c> is zero-based, i.e:</p> +<code> +1> Bin = <<1,2,3>> +2> binary_part(Bin,{0,2}). +<<1,2>> +</code> + + <p>See the STDLIB module <c>binary</c> for details about the <c>PosLen</c> semantics.</p> + + <p>Allowed in guard tests.</p> + </desc> + </func> + <func> + <name>binary_part(Subject, Start, Length) -> binary()</name> + <fsummary>Extracts a part of a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Start = int()</v> + <v>Length = int()</v> + </type> + <desc> + <p>The same as <c>binary_part(Subject, {Pos, Len})</c>.</p> + + <p>Allowed in guard tests.</p> + </desc> + </func> + <func> <name>binary_to_atom(Binary, Encoding) -> atom()</name> <fsummary>Convert from text representation to an atom</fsummary> <type> @@ -318,6 +366,11 @@ iolist() = [char() | binary() | iolist()] corresponding to the bytes from position <c>Start</c> to position <c>Stop</c> in <c>Binary</c>. Positions in the binary are numbered starting from 1.</p> + + <note><p>This functions indexing style of using one-based indices for + binaries is deprecated. New code should use the functions in + the STDLIB module <c>binary</c> instead. They consequently + use the same (zero-based) style of indexing.</p></note> </desc> </func> <func> diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 0a40e28474..3e8480324c 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2369,7 +2369,8 @@ static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en) goto badarg; } if (!n) { - goto badarg; + Eterm res_term = erts_new_heap_binary(p,NULL,0,&bytes); + BIF_RET(res_term); } ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size); if (bit_size != 0) { diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 13b9b2ff18..353c1b90b9 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -40,6 +40,7 @@ XML_REF3_FILES = \ array.xml \ base64.xml \ beam_lib.xml \ + binary.xml \ c.xml \ calendar.xml \ dets.xml \ diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml new file mode 100644 index 0000000000..05ec4406c6 --- /dev/null +++ b/lib/stdlib/doc/src/binary.xml @@ -0,0 +1,729 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2009</year> + <year>2010</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved on line at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>binary</title> + <prepared>Patrik Nyblom</prepared> + <responsible>Kenneth Lundin</responsible> + <docno>1</docno> + <approved></approved> + <checked></checked> + <date>2010-05-05</date> + <rev>A</rev> + <file>binary.xml</file> + </header> + <module>binary</module> + <modulesummary>Library for handling binary data</modulesummary> + <description> + + <p>This module contains functions for manipulating byte-oriented + binaries. Although the majority of functions could be implemented + using bit-syntax, the functions in this library are highly + optimized and are expected to either execute faster or consume + less memory (or both) than a counterpart written in pure Erlang.</p> + + <p>The module is implemented according to the EEP (Erlang Enhancement Proposal) 31.</p> + + <note> + <p> + The library handles byte-oriented data. Bitstrings that are not + binaries (does not contain whole octets of bits) will result in a <c>badarg</c> + exception being thrown from any of the functions in this + module. + </p> + </note> + + + </description> + <section> + <title>DATA TYPES</title> + <code type="none"> + cp() + - Opaque data-type representing a compiled search-pattern. Guaranteed to be a tuple() + to allow programs to distinguish it from non precompiled search patterns. + </code> + <code type="none"> + part() = {Start,Length} + Start = int() + Length = int() + - A representaion of a part (or range) in a binary. Start is a + zero-based offset into a binary() and Length is the length of + that part. As input to functions in this module, a reverse + part specification is allowed, constructed with a negative + Length, so that the part of the binary begins at Start + + Length and is -Length long. This is useful for referencing the + last N bytes of a binary as {size(Binary), -N}. The functions + in this module always return part()'s with positive Length. + </code> + </section> + <funcs> + <func> + <name>at(Subject, Pos) -> int()</name> + <fsummary>Returns the byte at a specific position in a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pos = int() >= 0</v> + </type> + <desc> + + <p>Returns the byte at position <c>Pos</c> (zero-based) in the binary + <c>Subject</c> as an integer. If <c>Pos</c> >= <c>byte_size(Subject)</c>, + a <c>badarg</c> + exception is raised.</p> + + </desc> + </func> + <func> + <name>bin_to_list(Subject) -> list()</name> + <fsummary>Convert a binary to a list of integers</fsummary> + <type> + <v>Subject = binary()</v> + </type> + <desc> + <p>The same as <c>bin_to_list(Subject,{0,byte_size(Subject)})</c>.</p> + </desc> + </func> + <func> + <name>bin_to_list(Subject, PosLen) -> list()</name> + <fsummary>Convert a binary to a list of integers</fsummary> + <type> + <v>Subject = binary()</v> + <v>PosLen = part()</v> + </type> + <desc> + + <p>Converts <c>Subject</c> to a list of <c>int()</c>s, each representing + the value of one byte. The <c>part()</c> denotes which part of the + <c>binary()</c> to convert. Example:</p> + +<code> +1> binary:bin_to_list(<<"erlang">>,{1,3}). +"rla" +%% or [114,108,97] in list notation. +</code> + <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception is raised.</p> + </desc> + </func> + <func> + <name>bin_to_list(Subject, Pos, Len) -> list()</name> + <fsummary>Convert a binary to a list of integers</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pos = int()</v> + <v>Len = int()</v> + </type> + <desc> + <p>The same as<c> bin_to_list(Subject,{Pos,Len})</c>.</p> + </desc> + </func> + <func> + <name>compile_pattern(Pattern) -> cp()</name> + <fsummary>Pre-compiles a binary search pattern</fsummary> + <type> + <v>Pattern = binary() | [ binary() ]</v> + </type> + <desc> + + <p>Builds an internal structure representing a compilation of a + search-pattern, later to be used in the <seealso marker="#match-3">match/3</seealso>, + <seealso marker="#matches-3">matches/3</seealso>, + <seealso marker="#split-3">split/3</seealso> or + <seealso marker="#replace-4">replace/4</seealso> + functions. The <c>cp()</c> returned is guaranteed to be a + <c>tuple()</c> to allow programs to distinguish it from non + pre-compiled search patterns</p> + + <p>When a list of binaries is given, it denotes a set of + alternative binaries to search for. I.e if + <c>[<<"functional">>,<<"programming">>]</c> + is given as <c>Pattern</c>, this + means "either <c><<"functional">></c> or + <c><<"programming">></c>". The pattern is a set of + alternatives; when only a single binary is given, the set has + only one element. The order of alternatives in a pattern is not significant.</p> + + <p>The list of binaries used for search alternatives shall be flat and proper.</p> + + <p>If <c>Pattern</c> is not a binary or a flat proper list of binaries with length > 0, + a <c>badarg</c> exception will be raised.</p> + + </desc> + </func> + <func> + <name>copy(Subject) -> binary()</name> + <fsummary>Creates a duplicate of a binary</fsummary> + <type> + <v>Subject = binary()</v> + </type> + <desc> + <p>The same as <c>copy(Subject, 1)</c>.</p> + </desc> + </func> + <func> + <name>copy(Subject,N) -> binary()</name> + <fsummary>Duplicates a binary N times and creates a new</fsummary> + <type> + <v>Subject = binary()</v> + <v>N = int() >= 0</v> + </type> + <desc> + <p>Creates a binary with the content of <c>Subject</c> duplicated <c>N</c> times.</p> + + <p>This function will always create a new binary, even if <c>N = + 1</c>. By using <c>copy/1</c> on a binary referencing a larger binary, one + might free up the larger binary for garbage collection.</p> + + <note> + <p>By deliberately copying a single binary to avoid referencing + a larger binary, one might, instead of freeing up the larger + binary for later garbage collection, create much more binary + data than needed. Sharing binary data is usually good. Only in + special cases, when small parts reference large binaries and the + large binaries are no longer used in any process, deliberate + copying might be a good idea.</p> </note> + + <p>If <c>N</c> < <c>0</c>, a <c>badarg</c> exception is raised.</p> + </desc> + </func> + <func> + <name>decode_unsigned(Subject) -> Unsigned</name> + <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary> + <type> + <v>Subject = binary()</v> + <v>Unsigned = int() >= 0</v> + </type> + <desc> + <p>The same as <c>decode_unsigned(Subject,big)</c>.</p> + </desc> + </func> + <func> + <name>decode_unsigned(Subject, Endianess) -> Unsigned</name> + <fsummary>Decode a whole binary into an integer of arbitrary size</fsummary> + <type> + <v>Subject = binary()</v> + <v>Endianess = big | little</v> + <v>Unsigned = int() >= 0</v> + </type> + <desc> + + <p>Converts the binary digit representation, in big or little + endian, of a positive integer in <c>Subject</c> to an Erlang <c>int()</c>.</p> + + <p>Example:</p> + + <code> +1> binary:decode_unsigned(<<169,138,199>>,big). +11111111 + </code> + </desc> + </func> + <func> + <name>encode_unsigned(Unsigned) -> binary()</name> + <fsummary>Encodes an unsigned integer into the minimal binary</fsummary> + <type> + <v>Unsigned = int() >= 0</v> + </type> + <desc> + <p>The same as <c>encode_unsigned(Unsigned,big)</c>.</p> + </desc> + </func> + <func> + <name>encode_unsigned(Unsigned,Endianess) -> binary()</name> + <fsummary>Encodes an unsigned integer into the minimal binary</fsummary> + <type> + <v>Unsigned = int() >= 0</v> + <v>Endianess = big | little</v> + </type> + <desc> + + <p>Converts a positive integer to the smallest possible + representation in a binary digit representation, either big + or little endian.</p> + + <p>Example:</p> + + <code> +1> binary:encode_unsigned(11111111,big). +<<169,138,199>> + </code> + </desc> + </func> + <func> + <name>first(Subject) -> int()</name> + <fsummary>Returns the first byte of a binary</fsummary> + <type> + <v>Subject = binary()</v> + </type> + <desc> + + <p>Returns the first byte of the binary <c>Subject</c> as an integer. If the + size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p> + + </desc> + </func> + <func> + <name>last(Subject) -> int()</name> + <fsummary>Returns the last byte of a binary</fsummary> + <type> + <v>Subject = binary()</v> + </type> + <desc> + + <p>Returns the last byte of the binary <c>Subject</c> as an integer. If the + size of <c>Subject</c> is zero, a <c>badarg</c> exception is raised.</p> + + </desc> + </func> + <func> + <name>list_to_bin(ByteList) -> binary()</name> + <fsummary>Convert a list of integers and binaries to a binary</fsummary> + <type> + <v>ByteList = iodata() (see module erlang)</v> + </type> + <desc> + <p>Works exactly as <c>erlang:list_to_binary/1</c>, added for completeness.</p> + </desc> + </func> + <func> + <name>longest_common_prefix(Binaries) -> int()</name> + <fsummary>Returns length of longest common prefix for a set of binaries</fsummary> + <type> + <v>Binaries = [ binary() ]</v> + </type> + <desc> + + <p>Returns the length of the longest common prefix of the + binaries in the list <c>Binaries</c>. Example:</p> + +<code> +1> binary:longest_common_prefix([<<"erlang">>,<<"ergonomy">>]). +2 +2> binary:longest_common_prefix([<<"erlang">>,<<"perl">>]). +0 +</code> + + <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p> + </desc> + </func> + <func> + <name>longest_common_suffix(Binaries) -> int()</name> + <fsummary>Returns length of longest common suffix for a set of binaries</fsummary> + <type> + <v>Binaries = [ binary() ]</v> + </type> + <desc> + + <p>Returns the length of the longest common suffix of the + binaries in the list <c>Binaries</c>. Example:</p> + +<code> +1> binary:longest_common_suffix([<<"erlang">>,<<"fang">>]). +3 +2> binary:longest_common_suffix([<<"erlang">>,<<"perl">>]). +0 +</code> + + <p>If <c>Binaries</c> is not a flat list of binaries, a <c>badarg</c> exception is raised.</p> + + </desc> + </func> + <func> + <name>match(Subject, Pattern) -> Found | <c>nomatch</c></name> + <fsummary>Searches for the first match of a pattern in a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Found = part()</v> + </type> + <desc> + <p>The same as <c>match(Subject, Pattern, [])</c>.</p> + </desc> + </func> + <func> + <name>match(Subject,Pattern,Options) -> Found | <c>nomatch</c></name> + <fsummary>Searches for the first match of a pattern in a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Found = part()</v> + <v>Options = [ Option ]</v> + <v>Option = {scope, part()}</v> + </type> + <desc> + + <p>Searches for the first occurrence of <c>Pattern</c> in <c>Subject</c> and + returns the position and length.</p> + + <p>The function will return <c>{Pos,Length}</c> for the binary + in <c>Pattern</c> starting at the lowest position in + <c>Subject</c>, Example:</p> + +<code> +1> binary:match(<<"abcde">>, [<<"bcde">>,<<"cd">>],[]). +{1,4} +</code> + + <p>Even though <c><<"cd">></c> ends before + <c><<"bcde">></c>, <c><<"bcde">></c> + begins first and is therefore the first match. If two + overlapping matches begins at the same position, the longest is + returned.</p> + + <p>Summary of the options:</p> + + <taglist> + <tag>{scope, {Start, Length}}</tag> + <item><p>Only the given part is searched. Return values still have + offsets from the beginning of <c>Subject</c>. A negative <c>Length</c> is + allowed as described in the <c>TYPES</c> section of this manual.</p></item> + </taglist> + + <p>If none of the strings in + <c>Pattern</c> is found, the atom <c>nomatch</c> is returned.</p> + + <p>For a description of <c>Pattern</c>, see + <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p> + + <p>If <c>{scope, {Start,Length}}</c> is given in the options + such that <c>Start</c> is larger than the size of + <c>Subject</c>, <c>Start + Length</c> is less than zero or + <c>Start + Length</c> is larger than the size of + <c>Subject</c>, a <c>badarg</c> exception is raised.</p> + + </desc> + </func> + <func> + <name>matches(Subject, Pattern) -> Found</name> + <fsummary>Searches for all matches of a pattern in a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Found = [ part() ] | []</v> + </type> + <desc> + <p>The same as <c>matches(Subject, Pattern, [])</c>.</p> + </desc> + </func> + <func> + <name>matches(Subject,Pattern,Options) -> Found</name> + <fsummary>Searches for all matches of a pattern in a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Found = [ part() ] | []</v> + <v>Options = [ Option ]</v> + <v>Option = {scope, part()}</v> + </type> + <desc> + + <p>Works like match, but the <c>Subject</c> is searched until + exhausted and a list of all non-overlapping parts matching + <c>Pattern</c> is returned (in order). </p> + + <p>The first and longest match is preferred to a shorter, + which is illustrated by the following example:</p> + +<code> +1> binary:matches(<<"abcde">>, + [<<"bcde">>,<<"bc">>>,<<"de">>],[]). +[{1,4}] +</code> + + <p>The result shows that <<bcde">> is selected instead of the + shorter match <<"bc">> (which would have given raise to one + more match,<<"de">>). This corresponds to the behavior of posix + regular expressions (and programs like awk), but is not + consistent with alternative matches in re (and Perl), where + instead lexical ordering in the search pattern selects which + string matches.</p> + + <p>If none of the strings in pattern is found, an empty list is returned.</p> + + <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso> and for a + description of available options, see <seealso marker="#match-3">match/3</seealso>.</p> + + <p>If <c>{scope, {Start,Length}}</c> is given in the options such that + <c>Start</c> is larger than the size of <c>Subject</c>, <c>Start + Length</c> is + less than zero or <c>Start + Length</c> is larger than the size of + <c>Subject</c>, a <c>badarg</c> exception is raised.</p> + + </desc> + </func> + <func> + <name>part(Subject, PosLen) -> binary()</name> + <fsummary>Extracts a part of a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>PosLen = part()</v> + </type> + <desc> + + <p>Extracts the part of the binary <c>Subject</c> described by <c>PosLen</c>.</p> + + <p>Negative length can be used to extract bytes at the end of a binary:</p> + +<code> +1> Bin = <<1,2,3,4,5,6,7,8,9,10>>. +2> binary:part(Bin,{byte_size(Bin), -5)). +<<6,7,8,9,10>> +</code> + + <note> + <p><seealso marker="#part-2">part/2</seealso>and <seealso + marker="#part-3">part/3</seealso> are also available in the + <c>erlang</c> module under the names <c>binary_part/2</c> and + <c>binary_part/3</c>. Those BIFs are allowed in guard tests.</p> + </note> + + <p>If <c>PosLen</c> in any way references outside the binary, a <c>badarg</c> exception + is raised.</p> + + </desc> + </func> + <func> + <name>part(Subject, Pos, Len) -> binary()</name> + <fsummary>Extracts a part of a binary</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pos = int()</v> + <v>Len = int()</v> + </type> + <desc> + <p>The same as <c>part(Subject, {Pos, Len})</c>.</p> + </desc> + </func> + <func> + <name>referenced_byte_size(binary()) -> int()</name> + <fsummary>Determines the size of the actual binary pointed out by a sub-binary</fsummary> + <desc> + + <p>If a binary references a larger binary (often described as + being a sub-binary), it can be useful to get the size of the + actual referenced binary. This function can be used in a program + to trigger the use of <c>copy/1</c>. By copying a binary, one might + dereference the original, possibly large, binary which a smaller + binary is a reference to.</p> + + <p>Example:</p> + + <code> +store(Binary, GBSet) -> + NewBin = + case binary:referenced_byte_size(Binary) of + Large when Large > 2 * byte_size(Binary) -> + binary:copy(Binary); + _ -> + Binary + end, + gb_sets:insert(NewBin,GBSet). + </code> + + <p>In this example, we chose to copy the binary content before + inserting it in the <c>gb_set()</c> if it references a binary more than + twice the size of the data we're going to keep. Of course + different rules for when copying will apply to different + programs.</p> + + <p>Binary sharing will occur whenever binaries are taken apart, + this is the fundamental reason why binaries are fast, + decomposition can always be done with O(1) complexity. In rare + circumstances this data sharing is however undesirable, why this + function together with <c>copy/1</c> might be useful when optimizing + for memory use.</p> + + <p>Example of binary sharing:</p> + + <code> +1> A = binary:copy(<<1>>,100). +<<1,1,1,1,1 ... +2> byte_size(A). +100 +3> binary:referenced_byte_size(A) +100 +4> <<_:10/binary,B:10/binary,_/binary>> = A. +<<1,1,1,1,1 ... +5> byte_size(B). +10 +6> binary:referenced_byte_size(B) +100 + </code> + + <note> + <p>Binary data is shared among processes. If another process + still references the larger binary, copying the part this + process uses only consumes more memory and will not free up the + larger binary for garbage collection. Use this kind of intrusive + functions with extreme care, and only if a real problem is + detected.</p> + </note> + + </desc> + </func> + <func> + <name>replace(Subject,Pattern,Replacement) -> Result</name> + <fsummary>Replaces bytes in a binary according to a pattern</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Replacement = binary()</v> + <v>Result = binary()</v> + </type> + <desc> + <p>The same as <c>replace(Subject,Pattern,Replacement,[])</c>.</p> + </desc> + </func> + <func> + <name>replace(Subject,Pattern,Replacement,Options) -> Result</name> + <fsummary>Replaces bytes in a binary according to a pattern</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Replacement = binary()</v> + <v>Result = binary()</v> + <v>Options = [ Option ]</v> + <v>Option = global | {scope, part()} | {insert_replaced, InsPos}</v> + <v>InsPos = OnePos | [ OnePos ]</v> + <v>OnePos = int() =< byte_size(Replacement)</v> + </type> + <desc> + + <p>Constructs a new binary by replacing the parts in + <c>Subject</c> matching <c>Pattern</c> with the content of + <c>Replacement</c>.</p> + + <p>If the matching sub-part of <c>Subject</c> giving raise to the + replacement is to be inserted in the result, the option + <c>{insert_replaced, InsPos}</c> will insert the matching part into + <c>Replacement</c> at the given position (or positions) before actually + inserting <c>Replacement</c> into the <c>Subject</c>. Example:</p> + +<code> +1> binary:replace(<<"abcde">>,<<"b">>,<<"[]">>,[{insert_replaced,1}]). +<<"a[b]cde">> +2> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>, + [global,{insert_replaced,1}]). +<<"a[b]c[d]e">> +3> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>, + [global,{insert_replaced,[1,1]}]). +<<"a[bb]c[dd]e">> +4> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[-]">>, + [global,{insert_replaced,[1,2]}]). +<<"a[b-b]c[d-d]e">> +</code> + + <p>If any position given in <c>InsPos</c> is greater than the size of the replacement binary, a <c>badarg</c> exception is raised.</p> + + <p>The options <c>global</c> and <c>{scope, part()}</c> works as for <seealso marker="#split-3">split/3</seealso>. The return type is always a <c>binary()</c>.</p> + + <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p> + </desc> + </func> + <func> + <name>split(Subject,Pattern) -> Parts</name> + <fsummary>Splits a binary according to a pattern</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Parts = [ binary() ]</v> + </type> + <desc> + <p>The same as <c>split(Subject, Pattern, [])</c>.</p> + </desc> + </func> + <func> + <name>split(Subject,Pattern,Options) -> Parts</name> + <fsummary>Splits a binary according to a pattern</fsummary> + <type> + <v>Subject = binary()</v> + <v>Pattern = binary() | [ binary() ] | cp()</v> + <v>Parts = [ binary() ]</v> + <v>Options = [ Option ]</v> + <v>Option = {scope, part()} | trim | global</v> + </type> + <desc> + + <p>Splits Binary into a list of binaries based on Pattern. If + the option global is not given, only the first occurrence of + Pattern in Subject will give rise to a split.</p> + + <p>The parts of Pattern actually found in Subject are not included in the result.</p> + + <p>Example:</p> + +<code> +1> binary:split(<<1,255,4,0,0,0,2,3>>, [<<0,0,0>>,<<2>>],[]). +[<<1,255,4>>, <<2,3>>] +2> binary:split(<<0,1,0,0,4,255,255,9>>, [<<0,0>>, <<255,255>>],[global]). +[<<0,1>>,<<4>>,<<9>>] +</code> + + <p>Summary of options:</p> + <taglist> + + <tag>{scope, part()}</tag> + + <item><p>Works as in <seealso marker="#match-3">match/3</seealso> and + <seealso marker="#matches-3">matches/3</seealso>. Note that + this only defines the scope of the search for matching strings, + it does not cut the binary before splitting. The bytes before + and after the scope will be kept in the result. See example + below.</p></item> + + <tag>trim</tag> + + <item><p>Removes trailing empty parts of the result (as does trim in <c>re:split/3</c>)</p></item> + + <tag>global</tag> + + <item><p>Repeats the split until the <c>Subject</c> is + exhausted. Conceptually the global option makes split work on + the positions returned by <seealso marker="#matches-3">matches/3</seealso>, + while it normally + works on the position returned by + <seealso marker="#match-3">match/3</seealso>.</p></item> + + </taglist> + + <p>Example of the difference between a scope and taking the + binary apart before splitting:</p> + +<code> +1> binary:split(<<"banana">>,[<<"a">>],[{scope,{2,3}}]). +[<<"ban">>,<<"na">>] +2> binary:split(binary:part(<<"banana">>,{2,3}),[<<"a">>],[]). +[<<"n">>,<<"n">>] +</code> + + <p>The return type is always a list of binaries that are all + referencing <c>Subject</c>. This means that the data in <c>Subject</c> is not + actually copied to new binaries and that <c>Subject</c> cannot be + garbage collected until the results of the split are no longer + referenced.</p> + + <p>For a description of <c>Pattern</c>, see <seealso marker="#compile_pattern-1">compile_pattern/1</seealso>.</p> + + </desc> + </func> + </funcs> +</erlref> diff --git a/lib/stdlib/doc/src/ref_man.xml b/lib/stdlib/doc/src/ref_man.xml index f6ae368e92..de7aeb2274 100644 --- a/lib/stdlib/doc/src/ref_man.xml +++ b/lib/stdlib/doc/src/ref_man.xml @@ -37,6 +37,7 @@ <xi:include href="array.xml"/> <xi:include href="base64.xml"/> <xi:include href="beam_lib.xml"/> + <xi:include href="binary.xml"/> <xi:include href="c.xml"/> <xi:include href="calendar.xml"/> <xi:include href="dets.xml"/> diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl index 028b7f0f17..16ed9a2c26 100644 --- a/lib/stdlib/test/binary_module_SUITE.erl +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -727,9 +727,10 @@ copy(Config) when is_list(Config) -> ?line RS = random_string({1,10000}), ?line RS = RS2 = binary:copy(RS), ?line false = erts_debug:same(RS,RS2), - ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)), + ?line <<>> = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)), ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3:3>>,2)), - ?line badarg = ?MASK_ERROR(binary:copy(<<>>,0)), + ?line badarg = ?MASK_ERROR(binary:copy([],0)), + ?line <<>> = ?MASK_ERROR(binary:copy(<<>>,0)), ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,1.0)), ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>, 16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl index af79c8fa09..6d96736ef3 100644 --- a/lib/stdlib/test/binref.erl +++ b/lib/stdlib/test/binref.erl @@ -465,7 +465,7 @@ copy(Subject) -> copy(Subject,1). copy(Subject,N) -> try - true = is_integer(N) and (N > 0) and is_binary(Subject), % Badarg, not function clause + true = is_integer(N) and (N >= 0) and is_binary(Subject), % Badarg, not function clause erlang:list_to_binary(lists:duplicate(N,Subject)) catch _:_ -> |