From 890fb3bc90cdab64506cd4a43ca0a04727b5b7ea Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 16 Oct 2017 17:56:46 +0200 Subject: erts: Add 'used' option to binary_to_term/2 --- erts/doc/src/erlang.xml | 29 ++++++++++++++++-------- erts/emulator/beam/external.c | 35 +++++++++++++++++++++++++++++ erts/emulator/test/binary_SUITE.erl | 45 ++++++++++++++++++++++++++++++++++++- erts/preloaded/src/erlang.erl | 6 +++-- 4 files changed, 103 insertions(+), 12 deletions(-) diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 2465f49581..a83ae3eb4b 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -549,9 +549,7 @@ hello Decode an Erlang external term format binary. -

As binary_to_term/1, but takes options that affect decoding - of the binary.

-

Option:

+

As binary_to_term/1, but takes these options:

safe @@ -567,18 +565,31 @@ hello creation of new external function references. None of those resources are garbage collected, so unchecked creation of them can exhaust available memory.

-
-
-

Failure: badarg if safe is specified and unsafe - data is decoded.

-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
 ** exception error: bad argument
 > hello.
 hello
-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
 hello
 
+ + used + +

Changes the return value to {Term, Used} where Used + is the number of bytes actually read from Binary.

+
+> Input = <<131,100,0,5,"hello","world">>.
+<<131,100,0,5,104,101,108,108,111,119,111,114,108,100>>
+> {Term, Used} = binary_to_term(Input, [used]).
+{hello, 9}
+> split_binary(Input, Used).
+{<<131,100,0,5,104,101,108,108,111>>, <<"world">>}
+
+
+ +

Failure: badarg if safe is specified and unsafe + data is decoded.

See also term_to_binary/1, diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 21d578c8ed..44ddd251f1 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1212,6 +1212,7 @@ typedef struct B2TContext_t { ErtsBinary2TermState b2ts; Uint32 flags; SWord reds; + Uint used_bytes; /* In: boolean, Out: bytes */ Eterm trap_bin; /* THE_NON_VALUE if not exported */ Export *bif; Eterm arg[2]; @@ -1306,6 +1307,11 @@ binary2term_prepare(ErtsBinary2TermState *state, byte *data, Sint data_size, ctx->u.uc.dbytes = state->extp; ctx->u.uc.dleft = dest_len; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes == 1); + /* to be subtracted by stream.avail_in when done */ + ctx->used_bytes = data_size; + } ctx->state = B2TUncompressChunk; *ctxp = ctx; } @@ -1515,6 +1521,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) && zret == Z_STREAM_END && ctx->u.uc.dleft == 0) { ctx->reds -= chunk; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes > 5 + ctx->u.uc.stream.avail_in); + ctx->used_bytes -= ctx->u.uc.stream.avail_in; + } ctx->state = B2TSizeInit; } else { @@ -1580,6 +1590,25 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) return ret_val; case B2TDone: + if (ctx->used_bytes) { + Eterm *hp; + Eterm used; + if (!ctx->b2ts.exttmp) { + ASSERT(ctx->used_bytes == 1); + ctx->used_bytes = (ctx->u.dc.ep - ctx->b2ts.extp + +1); /* VERSION_MAGIC */ + } + if (IS_USMALL(0, ctx->used_bytes)) { + hp = erts_produce_heap(&ctx->u.dc.factory, 3, 0); + used = make_small(ctx->used_bytes); + } + else { + hp = erts_produce_heap(&ctx->u.dc.factory, 3+BIG_UINT_HEAP_SIZE, 0); + used = uint_to_big(ctx->used_bytes, hp); + hp += BIG_UINT_HEAP_SIZE; + } + ctx->u.dc.res = TUPLE2(hp, ctx->u.dc.res, used); + } b2t_destroy_context(ctx); if (ctx->u.dc.factory.hp > ctx->u.dc.factory.hp_end) { @@ -1623,6 +1652,7 @@ BIF_RETTYPE binary_to_term_1(BIF_ALIST_1) B2TContext ctx; ctx.flags = 0; + ctx.used_bytes = 0; ctx.trap_bin = THE_NON_VALUE; ctx.bif = bif_export[BIF_binary_to_term_1]; ctx.arg[0] = BIF_ARG_1; @@ -1639,12 +1669,16 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) Eterm opt; ctx.flags = 0; + ctx.used_bytes = 0; opts = BIF_ARG_2; while (is_list(opts)) { opt = CAR(list_val(opts)); if (opt == am_safe) { ctx.flags |= ERTS_DIST_EXT_BTT_SAFE; } + else if (opt == am_used) { + ctx.used_bytes = 1; + } else { goto error; } @@ -3996,6 +4030,7 @@ dec_term_atom_common: if (ctx) { ctx->state = B2TDone; ctx->reds = reds; + ctx->u.dc.ep = ep; } return ep; diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl index a6526d3a1c..cbc2d8fae5 100644 --- a/erts/emulator/test/binary_SUITE.erl +++ b/erts/emulator/test/binary_SUITE.erl @@ -48,6 +48,7 @@ bad_list_to_binary/1, bad_binary_to_list/1, t_split_binary/1, bad_split/1, terms/1, terms_float/1, float_middle_endian/1, + b2t_used_big/1, external_size/1, t_iolist_size/1, t_hash/1, bad_size/1, @@ -72,6 +73,7 @@ all() -> t_split_binary, bad_split, bad_list_to_binary, bad_binary_to_list, terms, terms_float, float_middle_endian, external_size, t_iolist_size, + b2t_used_big, bad_binary_to_term_2, safe_binary_to_term2, bad_binary_to_term, bad_terms, t_hash, bad_size, bad_term_to_binary, more_bad_terms, otp_5484, otp_5933, @@ -439,22 +441,63 @@ terms(Config) when is_list(Config) -> end, Term = binary_to_term_stress(Bin), Term = binary_to_term_stress(Bin, [safe]), + Bin_sz = byte_size(Bin), + {Term,Bin_sz} = binary_to_term_stress(Bin, [used]), + + BinE = <>, + {Term,Bin_sz} = binary_to_term_stress(BinE, [used]), + BinU = make_unaligned_sub_binary(Bin), Term = binary_to_term_stress(BinU), Term = binary_to_term_stress(BinU, []), Term = binary_to_term_stress(BinU, [safe]), + {Term,Bin_sz} = binary_to_term_stress(BinU, [used]), + + BinUE = make_unaligned_sub_binary(BinE), + {Term,Bin_sz} = binary_to_term_stress(BinUE, [used]), + BinC = erlang:term_to_binary(Term, [compressed]), + BinC_sz = byte_size(BinC), + true = BinC_sz =< size(Bin), Term = binary_to_term_stress(BinC), - true = size(BinC) =< size(Bin), + {Term, BinC_sz} = binary_to_term_stress(BinC, [used]), + Bin = term_to_binary(Term, [{compressed,0}]), terms_compression_levels(Term, size(Bin), 1), BinUC = make_unaligned_sub_binary(BinC), Term = binary_to_term_stress(BinUC), + {Term,BinC_sz} = binary_to_term_stress(BinUC, [used]), + + BinCE = <>, + {Term,BinC_sz} = binary_to_term_stress(BinCE, [used]), + + BinUCE = make_unaligned_sub_binary(BinCE), + Term = binary_to_term_stress(BinUCE), + {Term,BinC_sz} = binary_to_term_stress(BinUCE, [used]) end, test_terms(TestFun), ok. +%% Test binary_to_term(_, [used]) returning a big Used integer. +b2t_used_big(_Config) -> + case erlang:system_info(wordsize) of + 8 -> + {skipped, "This is not a 32-bit machine"}; + 4 -> + %% Use a long utf8 atom for large external format but compact on heap. + BigAtom = binary_to_atom(<< <<16#F0908D88:32>> || _ <- lists:seq(1,255) >>, + utf8), + Atoms = (1 bsl 17) + (1 bsl 9), + BigAtomList = lists:duplicate(Atoms, BigAtom), + BigBin = term_to_binary(BigAtomList), + {BigAtomList, Used} = binary_to_term(BigBin, [used]), + 2 = erts_debug:size(Used), + Used = byte_size(BigBin), + Used = 1 + 1 + 4 + Atoms*(1+2+4*255) + 1, + ok + end. + terms_compression_levels(Term, UncompressedSz, Level) when Level < 10 -> BinC = erlang:term_to_binary(Term, [{compressed,Level}]), Term = binary_to_term_stress(BinC), diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index f796ea64d3..4ae76cb309 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -417,9 +417,11 @@ binary_to_term(_Binary) -> erlang:nif_error(undefined). %% binary_to_term/2 --spec binary_to_term(Binary, Opts) -> term() when +-spec binary_to_term(Binary, Opts) -> term() | {term(), Used} when Binary :: ext_binary(), - Opts :: [safe]. + Opt :: safe | used, + Opts :: [Opt], + Used :: pos_integer(). binary_to_term(_Binary, _Opts) -> erlang:nif_error(undefined). -- cgit v1.2.3