From 890fb3bc90cdab64506cd4a43ca0a04727b5b7ea Mon Sep 17 00:00:00 2001
From: Sverker Eriksson
Date: Mon, 16 Oct 2017 17:56:46 +0200
Subject: erts: Add 'used' option to binary_to_term/2
---
erts/doc/src/erlang.xml | 29 ++++++++++++++++--------
erts/emulator/beam/external.c | 35 +++++++++++++++++++++++++++++
erts/emulator/test/binary_SUITE.erl | 45 ++++++++++++++++++++++++++++++++++++-
erts/preloaded/src/erlang.erl | 6 +++--
4 files changed, 103 insertions(+), 12 deletions(-)
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 2465f49581..a83ae3eb4b 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -549,9 +549,7 @@ hello
Decode an Erlang external term format binary.
- As binary_to_term/1 , but takes options that affect decoding
- of the binary.
- Option:
+ As binary_to_term/1 , but takes these options:
safe
-
@@ -567,18 +565,31 @@ hello
creation of new external function references.
None of those resources are garbage collected, so unchecked
creation of them can exhaust available memory.
-
-
- Failure: badarg if safe is specified and unsafe
- data is decoded.
-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
** exception error: bad argument
> hello.
hello
-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
hello
+
+ used
+ -
+
Changes the return value to {Term, Used} where Used
+ is the number of bytes actually read from Binary .
+
+> Input = <<131,100,0,5,"hello","world">>.
+<<131,100,0,5,104,101,108,108,111,119,111,114,108,100>>
+> {Term, Used} = binary_to_term(Input, [used]).
+{hello, 9}
+> split_binary(Input, Used).
+{<<131,100,0,5,104,101,108,108,111>>, <<"world">>}
+
+
+
+ Failure: badarg if safe is specified and unsafe
+ data is decoded.
See also
term_to_binary/1 ,
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 21d578c8ed..44ddd251f1 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -1212,6 +1212,7 @@ typedef struct B2TContext_t {
ErtsBinary2TermState b2ts;
Uint32 flags;
SWord reds;
+ Uint used_bytes; /* In: boolean, Out: bytes */
Eterm trap_bin; /* THE_NON_VALUE if not exported */
Export *bif;
Eterm arg[2];
@@ -1306,6 +1307,11 @@ binary2term_prepare(ErtsBinary2TermState *state, byte *data, Sint data_size,
ctx->u.uc.dbytes = state->extp;
ctx->u.uc.dleft = dest_len;
+ if (ctx->used_bytes) {
+ ASSERT(ctx->used_bytes == 1);
+ /* to be subtracted by stream.avail_in when done */
+ ctx->used_bytes = data_size;
+ }
ctx->state = B2TUncompressChunk;
*ctxp = ctx;
}
@@ -1515,6 +1521,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx)
&& zret == Z_STREAM_END
&& ctx->u.uc.dleft == 0) {
ctx->reds -= chunk;
+ if (ctx->used_bytes) {
+ ASSERT(ctx->used_bytes > 5 + ctx->u.uc.stream.avail_in);
+ ctx->used_bytes -= ctx->u.uc.stream.avail_in;
+ }
ctx->state = B2TSizeInit;
}
else {
@@ -1580,6 +1590,25 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx)
return ret_val;
case B2TDone:
+ if (ctx->used_bytes) {
+ Eterm *hp;
+ Eterm used;
+ if (!ctx->b2ts.exttmp) {
+ ASSERT(ctx->used_bytes == 1);
+ ctx->used_bytes = (ctx->u.dc.ep - ctx->b2ts.extp
+ +1); /* VERSION_MAGIC */
+ }
+ if (IS_USMALL(0, ctx->used_bytes)) {
+ hp = erts_produce_heap(&ctx->u.dc.factory, 3, 0);
+ used = make_small(ctx->used_bytes);
+ }
+ else {
+ hp = erts_produce_heap(&ctx->u.dc.factory, 3+BIG_UINT_HEAP_SIZE, 0);
+ used = uint_to_big(ctx->used_bytes, hp);
+ hp += BIG_UINT_HEAP_SIZE;
+ }
+ ctx->u.dc.res = TUPLE2(hp, ctx->u.dc.res, used);
+ }
b2t_destroy_context(ctx);
if (ctx->u.dc.factory.hp > ctx->u.dc.factory.hp_end) {
@@ -1623,6 +1652,7 @@ BIF_RETTYPE binary_to_term_1(BIF_ALIST_1)
B2TContext ctx;
ctx.flags = 0;
+ ctx.used_bytes = 0;
ctx.trap_bin = THE_NON_VALUE;
ctx.bif = bif_export[BIF_binary_to_term_1];
ctx.arg[0] = BIF_ARG_1;
@@ -1639,12 +1669,16 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2)
Eterm opt;
ctx.flags = 0;
+ ctx.used_bytes = 0;
opts = BIF_ARG_2;
while (is_list(opts)) {
opt = CAR(list_val(opts));
if (opt == am_safe) {
ctx.flags |= ERTS_DIST_EXT_BTT_SAFE;
}
+ else if (opt == am_used) {
+ ctx.used_bytes = 1;
+ }
else {
goto error;
}
@@ -3996,6 +4030,7 @@ dec_term_atom_common:
if (ctx) {
ctx->state = B2TDone;
ctx->reds = reds;
+ ctx->u.dc.ep = ep;
}
return ep;
diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl
index a6526d3a1c..cbc2d8fae5 100644
--- a/erts/emulator/test/binary_SUITE.erl
+++ b/erts/emulator/test/binary_SUITE.erl
@@ -48,6 +48,7 @@
bad_list_to_binary/1, bad_binary_to_list/1,
t_split_binary/1, bad_split/1,
terms/1, terms_float/1, float_middle_endian/1,
+ b2t_used_big/1,
external_size/1, t_iolist_size/1,
t_hash/1,
bad_size/1,
@@ -72,6 +73,7 @@ all() ->
t_split_binary, bad_split,
bad_list_to_binary, bad_binary_to_list, terms,
terms_float, float_middle_endian, external_size, t_iolist_size,
+ b2t_used_big,
bad_binary_to_term_2, safe_binary_to_term2,
bad_binary_to_term, bad_terms, t_hash, bad_size,
bad_term_to_binary, more_bad_terms, otp_5484, otp_5933,
@@ -439,22 +441,63 @@ terms(Config) when is_list(Config) ->
end,
Term = binary_to_term_stress(Bin),
Term = binary_to_term_stress(Bin, [safe]),
+ Bin_sz = byte_size(Bin),
+ {Term,Bin_sz} = binary_to_term_stress(Bin, [used]),
+
+ BinE = <>,
+ {Term,Bin_sz} = binary_to_term_stress(BinE, [used]),
+
BinU = make_unaligned_sub_binary(Bin),
Term = binary_to_term_stress(BinU),
Term = binary_to_term_stress(BinU, []),
Term = binary_to_term_stress(BinU, [safe]),
+ {Term,Bin_sz} = binary_to_term_stress(BinU, [used]),
+
+ BinUE = make_unaligned_sub_binary(BinE),
+ {Term,Bin_sz} = binary_to_term_stress(BinUE, [used]),
+
BinC = erlang:term_to_binary(Term, [compressed]),
+ BinC_sz = byte_size(BinC),
+ true = BinC_sz =< size(Bin),
Term = binary_to_term_stress(BinC),
- true = size(BinC) =< size(Bin),
+ {Term, BinC_sz} = binary_to_term_stress(BinC, [used]),
+
Bin = term_to_binary(Term, [{compressed,0}]),
terms_compression_levels(Term, size(Bin), 1),
BinUC = make_unaligned_sub_binary(BinC),
Term = binary_to_term_stress(BinUC),
+ {Term,BinC_sz} = binary_to_term_stress(BinUC, [used]),
+
+ BinCE = <>,
+ {Term,BinC_sz} = binary_to_term_stress(BinCE, [used]),
+
+ BinUCE = make_unaligned_sub_binary(BinCE),
+ Term = binary_to_term_stress(BinUCE),
+ {Term,BinC_sz} = binary_to_term_stress(BinUCE, [used])
end,
test_terms(TestFun),
ok.
+%% Test binary_to_term(_, [used]) returning a big Used integer.
+b2t_used_big(_Config) ->
+ case erlang:system_info(wordsize) of
+ 8 ->
+ {skipped, "This is not a 32-bit machine"};
+ 4 ->
+ %% Use a long utf8 atom for large external format but compact on heap.
+ BigAtom = binary_to_atom(<< <<16#F0908D88:32>> || _ <- lists:seq(1,255) >>,
+ utf8),
+ Atoms = (1 bsl 17) + (1 bsl 9),
+ BigAtomList = lists:duplicate(Atoms, BigAtom),
+ BigBin = term_to_binary(BigAtomList),
+ {BigAtomList, Used} = binary_to_term(BigBin, [used]),
+ 2 = erts_debug:size(Used),
+ Used = byte_size(BigBin),
+ Used = 1 + 1 + 4 + Atoms*(1+2+4*255) + 1,
+ ok
+ end.
+
terms_compression_levels(Term, UncompressedSz, Level) when Level < 10 ->
BinC = erlang:term_to_binary(Term, [{compressed,Level}]),
Term = binary_to_term_stress(BinC),
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index f796ea64d3..4ae76cb309 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -417,9 +417,11 @@ binary_to_term(_Binary) ->
erlang:nif_error(undefined).
%% binary_to_term/2
--spec binary_to_term(Binary, Opts) -> term() when
+-spec binary_to_term(Binary, Opts) -> term() | {term(), Used} when
Binary :: ext_binary(),
- Opts :: [safe].
+ Opt :: safe | used,
+ Opts :: [Opt],
+ Used :: pos_integer().
binary_to_term(_Binary, _Opts) ->
erlang:nif_error(undefined).
--
cgit v1.2.3