From 29fbd3acc663c5e4dcc6ff514dccfa20baeb62bd Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Fri, 13 Oct 2017 20:14:46 +0200 Subject: erts: Refactor binary_to_term/1/2 Reduce number of arguments to binary_to_term_int and use the context struct instead. --- erts/emulator/beam/external.c | 62 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index c0a3838d42..21d578c8ed 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -122,8 +122,6 @@ static int encode_size_struct_int(struct TTBSizeContext_*, ErtsAtomCacheMap *acm static Export binary_to_term_trap_export; static BIF_RETTYPE binary_to_term_trap_1(BIF_ALIST_1); -static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binary* context_b, - Export *bif, Eterm arg0, Eterm arg1); void erts_init_external(void) { erts_init_trap_export(&term_to_binary_trap_export, @@ -1214,7 +1212,7 @@ typedef struct B2TContext_t { ErtsBinary2TermState b2ts; Uint32 flags; SWord reds; - Eterm trap_bin; + Eterm trap_bin; /* THE_NON_VALUE if not exported */ Export *bif; Eterm arg[2]; enum B2TState state; @@ -1410,13 +1408,15 @@ static int b2t_context_destructor(Binary *context_bin) return 1; } +static BIF_RETTYPE binary_to_term_int(Process*, Eterm bin, B2TContext*); + + static BIF_RETTYPE binary_to_term_trap_1(BIF_ALIST_1) { Binary *context_bin = erts_magic_ref2bin(BIF_ARG_1); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(context_bin) == b2t_context_destructor); - return binary_to_term_int(BIF_P, 0, THE_NON_VALUE, context_bin, NULL, - THE_NON_VALUE, THE_NON_VALUE); + return binary_to_term_int(BIF_P, THE_NON_VALUE, ERTS_MAGIC_BIN_DATA(context_bin)); } @@ -1442,6 +1442,8 @@ static B2TContext* b2t_export_context(Process* p, B2TContext* src) b2t_context_destructor); B2TContext* ctx = ERTS_MAGIC_BIN_DATA(context_b); Eterm* hp; + + ASSERT(is_non_value(src->trap_bin)); sys_memcpy(ctx, src, sizeof(B2TContext)); if (ctx->state >= B2TDecode && ctx->u.dc.next == &src->u.dc.res) { ctx->u.dc.next = &ctx->u.dc.res; @@ -1451,8 +1453,7 @@ static B2TContext* b2t_export_context(Process* p, B2TContext* src) return ctx; } -static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binary* context_b, - Export *bif_init, Eterm arg0, Eterm arg1) +static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) { BIF_RETTYPE ret_val; #ifdef EXTREME_B2T_TRAPPING @@ -1460,25 +1461,17 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar #else SWord initial_reds = (Uint)(ERTS_BIF_REDS_LEFT(p) * B2T_BYTES_PER_REDUCTION); #endif - B2TContext c_buff; - B2TContext *ctx; int is_first_call; - if (context_b == NULL) { + if (is_value(bin)) { /* Setup enough to get started */ is_first_call = 1; - ctx = &c_buff; ctx->state = B2TPrepare; ctx->aligned_alloc = NULL; - ctx->flags = flags; - ctx->bif = bif_init; - ctx->arg[0] = arg0; - ctx->arg[1] = arg1; - IF_DEBUG(ctx->trap_bin = THE_NON_VALUE;) } else { - is_first_call = 0; - ctx = ERTS_MAGIC_BIN_DATA(context_b); + ASSERT(is_value(ctx->trap_bin)); ASSERT(ctx->state != B2TPrepare); + is_first_call = 0; } ctx->reds = initial_reds; @@ -1540,11 +1533,11 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar break; case B2TDecodeInit: - if (ctx == &c_buff && ctx->b2ts.extsize > ctx->reds) { + if (is_non_value(ctx->trap_bin) && ctx->b2ts.extsize > ctx->reds) { /* dec_term will maybe trap, allocate space for magic bin before result term to make it easy to trim with HRelease. */ - ctx = b2t_export_context(p, &c_buff); + ctx = b2t_export_context(p, ctx); } ctx->u.dc.ep = ctx->b2ts.extp; ctx->u.dc.res = (Eterm) (UWord) NULL; @@ -1607,11 +1600,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar } }while (ctx->reds > 0 || ctx->state >= B2TDone); - if (ctx == &c_buff) { - ASSERT(ctx->trap_bin == THE_NON_VALUE); - ctx = b2t_export_context(p, &c_buff); + if (is_non_value(ctx->trap_bin)) { + ctx = b2t_export_context(p, ctx); + ASSERT(is_value(ctx->trap_bin)); } - ASSERT(ctx->trap_bin != THE_NON_VALUE); if (is_first_call) { erts_set_gc_state(p, 0); @@ -1628,23 +1620,30 @@ HIPE_WRAPPER_BIF_DISABLE_GC(binary_to_term, 1) BIF_RETTYPE binary_to_term_1(BIF_ALIST_1) { - return binary_to_term_int(BIF_P, 0, BIF_ARG_1, NULL, bif_export[BIF_binary_to_term_1], - BIF_ARG_1, THE_NON_VALUE); + B2TContext ctx; + + ctx.flags = 0; + ctx.trap_bin = THE_NON_VALUE; + ctx.bif = bif_export[BIF_binary_to_term_1]; + ctx.arg[0] = BIF_ARG_1; + ctx.arg[1] = THE_NON_VALUE; + return binary_to_term_int(BIF_P, BIF_ARG_1, &ctx); } HIPE_WRAPPER_BIF_DISABLE_GC(binary_to_term, 2) BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) { + B2TContext ctx; Eterm opts; Eterm opt; - Uint32 flags = 0; + ctx.flags = 0; opts = BIF_ARG_2; while (is_list(opts)) { opt = CAR(list_val(opts)); if (opt == am_safe) { - flags |= ERTS_DIST_EXT_BTT_SAFE; + ctx.flags |= ERTS_DIST_EXT_BTT_SAFE; } else { goto error; @@ -1655,8 +1654,11 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) if (is_not_nil(opts)) goto error; - return binary_to_term_int(BIF_P, flags, BIF_ARG_1, NULL, bif_export[BIF_binary_to_term_2], - BIF_ARG_1, BIF_ARG_2); + ctx.trap_bin = THE_NON_VALUE; + ctx.bif = bif_export[BIF_binary_to_term_2]; + ctx.arg[0] = BIF_ARG_1; + ctx.arg[1] = BIF_ARG_2; + return binary_to_term_int(BIF_P, BIF_ARG_1, &ctx); error: BIF_ERROR(BIF_P, BADARG); -- cgit v1.2.3 From f0695a70cdaa4fe0fc0e7c58fb791483af0efa1a Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 16 Oct 2017 16:29:52 +0200 Subject: erts: Cleanup binary_SUITE:terms Remove try-catch which must be for some old bitstring limitation. --- erts/emulator/test/binary_SUITE.erl | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl index 61536bacd7..a6526d3a1c 100644 --- a/erts/emulator/test/binary_SUITE.erl +++ b/erts/emulator/test/binary_SUITE.erl @@ -425,36 +425,32 @@ bad_term_to_binary(Config) when is_list(Config) -> terms(Config) when is_list(Config) -> TestFun = fun(Term) -> - try - S = io_lib:format("~p", [Term]), - io:put_chars(S) - catch - error:badarg -> - io:put_chars("bit sized binary") - end, + S = io_lib:format("~p", [Term]), + io:put_chars(S), Bin = term_to_binary(Term), case erlang:external_size(Bin) of Sz when is_integer(Sz), size(Bin) =< Sz -> ok end, - Bin1 = term_to_binary(Term, [{minor_version, 1}]), - case erlang:external_size(Bin1, [{minor_version, 1}]) of - Sz1 when is_integer(Sz1), size(Bin1) =< Sz1 -> - ok - end, + Bin1 = term_to_binary(Term, [{minor_version, 1}]), + case erlang:external_size(Bin1, [{minor_version, 1}]) of + Sz1 when is_integer(Sz1), size(Bin1) =< Sz1 -> + ok + end, Term = binary_to_term_stress(Bin), Term = binary_to_term_stress(Bin, [safe]), - Unaligned = make_unaligned_sub_binary(Bin), - Term = binary_to_term_stress(Unaligned), - Term = binary_to_term_stress(Unaligned, []), - Term = binary_to_term_stress(Bin, [safe]), + BinU = make_unaligned_sub_binary(Bin), + Term = binary_to_term_stress(BinU), + Term = binary_to_term_stress(BinU, []), + Term = binary_to_term_stress(BinU, [safe]), BinC = erlang:term_to_binary(Term, [compressed]), Term = binary_to_term_stress(BinC), true = size(BinC) =< size(Bin), Bin = term_to_binary(Term, [{compressed,0}]), terms_compression_levels(Term, size(Bin), 1), - UnalignedC = make_unaligned_sub_binary(BinC), - Term = binary_to_term_stress(UnalignedC) + + BinUC = make_unaligned_sub_binary(BinC), + Term = binary_to_term_stress(BinUC), end, test_terms(TestFun), ok. -- cgit v1.2.3 From 890fb3bc90cdab64506cd4a43ca0a04727b5b7ea Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 16 Oct 2017 17:56:46 +0200 Subject: erts: Add 'used' option to binary_to_term/2 --- erts/doc/src/erlang.xml | 29 ++++++++++++++++-------- erts/emulator/beam/external.c | 35 +++++++++++++++++++++++++++++ erts/emulator/test/binary_SUITE.erl | 45 ++++++++++++++++++++++++++++++++++++- erts/preloaded/src/erlang.erl | 6 +++-- 4 files changed, 103 insertions(+), 12 deletions(-) diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 2465f49581..a83ae3eb4b 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -549,9 +549,7 @@ hello Decode an Erlang external term format binary. -

As binary_to_term/1, but takes options that affect decoding - of the binary.

-

Option:

+

As binary_to_term/1, but takes these options:

safe @@ -567,18 +565,31 @@ hello creation of new external function references. None of those resources are garbage collected, so unchecked creation of them can exhaust available memory.

-
-
-

Failure: badarg if safe is specified and unsafe - data is decoded.

-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
 ** exception error: bad argument
 > hello.
 hello
-> binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).
+> binary_to_term(<<131,100,0,5,"hello">>, [safe]).
 hello
 
+ + used + +

Changes the return value to {Term, Used} where Used + is the number of bytes actually read from Binary.

+
+> Input = <<131,100,0,5,"hello","world">>.
+<<131,100,0,5,104,101,108,108,111,119,111,114,108,100>>
+> {Term, Used} = binary_to_term(Input, [used]).
+{hello, 9}
+> split_binary(Input, Used).
+{<<131,100,0,5,104,101,108,108,111>>, <<"world">>}
+
+
+ +

Failure: badarg if safe is specified and unsafe + data is decoded.

See also term_to_binary/1, diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 21d578c8ed..44ddd251f1 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1212,6 +1212,7 @@ typedef struct B2TContext_t { ErtsBinary2TermState b2ts; Uint32 flags; SWord reds; + Uint used_bytes; /* In: boolean, Out: bytes */ Eterm trap_bin; /* THE_NON_VALUE if not exported */ Export *bif; Eterm arg[2]; @@ -1306,6 +1307,11 @@ binary2term_prepare(ErtsBinary2TermState *state, byte *data, Sint data_size, ctx->u.uc.dbytes = state->extp; ctx->u.uc.dleft = dest_len; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes == 1); + /* to be subtracted by stream.avail_in when done */ + ctx->used_bytes = data_size; + } ctx->state = B2TUncompressChunk; *ctxp = ctx; } @@ -1515,6 +1521,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) && zret == Z_STREAM_END && ctx->u.uc.dleft == 0) { ctx->reds -= chunk; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes > 5 + ctx->u.uc.stream.avail_in); + ctx->used_bytes -= ctx->u.uc.stream.avail_in; + } ctx->state = B2TSizeInit; } else { @@ -1580,6 +1590,25 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) return ret_val; case B2TDone: + if (ctx->used_bytes) { + Eterm *hp; + Eterm used; + if (!ctx->b2ts.exttmp) { + ASSERT(ctx->used_bytes == 1); + ctx->used_bytes = (ctx->u.dc.ep - ctx->b2ts.extp + +1); /* VERSION_MAGIC */ + } + if (IS_USMALL(0, ctx->used_bytes)) { + hp = erts_produce_heap(&ctx->u.dc.factory, 3, 0); + used = make_small(ctx->used_bytes); + } + else { + hp = erts_produce_heap(&ctx->u.dc.factory, 3+BIG_UINT_HEAP_SIZE, 0); + used = uint_to_big(ctx->used_bytes, hp); + hp += BIG_UINT_HEAP_SIZE; + } + ctx->u.dc.res = TUPLE2(hp, ctx->u.dc.res, used); + } b2t_destroy_context(ctx); if (ctx->u.dc.factory.hp > ctx->u.dc.factory.hp_end) { @@ -1623,6 +1652,7 @@ BIF_RETTYPE binary_to_term_1(BIF_ALIST_1) B2TContext ctx; ctx.flags = 0; + ctx.used_bytes = 0; ctx.trap_bin = THE_NON_VALUE; ctx.bif = bif_export[BIF_binary_to_term_1]; ctx.arg[0] = BIF_ARG_1; @@ -1639,12 +1669,16 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) Eterm opt; ctx.flags = 0; + ctx.used_bytes = 0; opts = BIF_ARG_2; while (is_list(opts)) { opt = CAR(list_val(opts)); if (opt == am_safe) { ctx.flags |= ERTS_DIST_EXT_BTT_SAFE; } + else if (opt == am_used) { + ctx.used_bytes = 1; + } else { goto error; } @@ -3996,6 +4030,7 @@ dec_term_atom_common: if (ctx) { ctx->state = B2TDone; ctx->reds = reds; + ctx->u.dc.ep = ep; } return ep; diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl index a6526d3a1c..cbc2d8fae5 100644 --- a/erts/emulator/test/binary_SUITE.erl +++ b/erts/emulator/test/binary_SUITE.erl @@ -48,6 +48,7 @@ bad_list_to_binary/1, bad_binary_to_list/1, t_split_binary/1, bad_split/1, terms/1, terms_float/1, float_middle_endian/1, + b2t_used_big/1, external_size/1, t_iolist_size/1, t_hash/1, bad_size/1, @@ -72,6 +73,7 @@ all() -> t_split_binary, bad_split, bad_list_to_binary, bad_binary_to_list, terms, terms_float, float_middle_endian, external_size, t_iolist_size, + b2t_used_big, bad_binary_to_term_2, safe_binary_to_term2, bad_binary_to_term, bad_terms, t_hash, bad_size, bad_term_to_binary, more_bad_terms, otp_5484, otp_5933, @@ -439,22 +441,63 @@ terms(Config) when is_list(Config) -> end, Term = binary_to_term_stress(Bin), Term = binary_to_term_stress(Bin, [safe]), + Bin_sz = byte_size(Bin), + {Term,Bin_sz} = binary_to_term_stress(Bin, [used]), + + BinE = <>, + {Term,Bin_sz} = binary_to_term_stress(BinE, [used]), + BinU = make_unaligned_sub_binary(Bin), Term = binary_to_term_stress(BinU), Term = binary_to_term_stress(BinU, []), Term = binary_to_term_stress(BinU, [safe]), + {Term,Bin_sz} = binary_to_term_stress(BinU, [used]), + + BinUE = make_unaligned_sub_binary(BinE), + {Term,Bin_sz} = binary_to_term_stress(BinUE, [used]), + BinC = erlang:term_to_binary(Term, [compressed]), + BinC_sz = byte_size(BinC), + true = BinC_sz =< size(Bin), Term = binary_to_term_stress(BinC), - true = size(BinC) =< size(Bin), + {Term, BinC_sz} = binary_to_term_stress(BinC, [used]), + Bin = term_to_binary(Term, [{compressed,0}]), terms_compression_levels(Term, size(Bin), 1), BinUC = make_unaligned_sub_binary(BinC), Term = binary_to_term_stress(BinUC), + {Term,BinC_sz} = binary_to_term_stress(BinUC, [used]), + + BinCE = <>, + {Term,BinC_sz} = binary_to_term_stress(BinCE, [used]), + + BinUCE = make_unaligned_sub_binary(BinCE), + Term = binary_to_term_stress(BinUCE), + {Term,BinC_sz} = binary_to_term_stress(BinUCE, [used]) end, test_terms(TestFun), ok. +%% Test binary_to_term(_, [used]) returning a big Used integer. +b2t_used_big(_Config) -> + case erlang:system_info(wordsize) of + 8 -> + {skipped, "This is not a 32-bit machine"}; + 4 -> + %% Use a long utf8 atom for large external format but compact on heap. + BigAtom = binary_to_atom(<< <<16#F0908D88:32>> || _ <- lists:seq(1,255) >>, + utf8), + Atoms = (1 bsl 17) + (1 bsl 9), + BigAtomList = lists:duplicate(Atoms, BigAtom), + BigBin = term_to_binary(BigAtomList), + {BigAtomList, Used} = binary_to_term(BigBin, [used]), + 2 = erts_debug:size(Used), + Used = byte_size(BigBin), + Used = 1 + 1 + 4 + Atoms*(1+2+4*255) + 1, + ok + end. + terms_compression_levels(Term, UncompressedSz, Level) when Level < 10 -> BinC = erlang:term_to_binary(Term, [{compressed,Level}]), Term = binary_to_term_stress(BinC), diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index f796ea64d3..4ae76cb309 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -417,9 +417,11 @@ binary_to_term(_Binary) -> erlang:nif_error(undefined). %% binary_to_term/2 --spec binary_to_term(Binary, Opts) -> term() when +-spec binary_to_term(Binary, Opts) -> term() | {term(), Used} when Binary :: ext_binary(), - Opts :: [safe]. + Opt :: safe | used, + Opts :: [Opt], + Used :: pos_integer(). binary_to_term(_Binary, _Opts) -> erlang:nif_error(undefined). -- cgit v1.2.3 From 6a6019c287a54b71d4d0bcf0a72d244d89de90d1 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 16 Oct 2017 18:01:20 +0200 Subject: kernel: Fix erl_distribution_wb_SUITE to use binary_to_term/2 with 'used' option and not rely on term_to_binary generating the same format. --- lib/kernel/test/erl_distribution_wb_SUITE.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/kernel/test/erl_distribution_wb_SUITE.erl b/lib/kernel/test/erl_distribution_wb_SUITE.erl index 03aaee56b7..258ed4f88c 100644 --- a/lib/kernel/test/erl_distribution_wb_SUITE.erl +++ b/lib/kernel/test/erl_distribution_wb_SUITE.erl @@ -65,6 +65,7 @@ ?DFLAG_EXTENDED_PIDS_PORTS bor ?DFLAG_UTF8_ATOMS)). +-define(PASS_THROUGH, $p). -define(shutdown(X), exit(X)). -define(int16(X), [((X) bsr 8) band 16#ff, (X) band 16#ff]). @@ -676,10 +677,9 @@ recv_message(Socket) -> case gen_tcp:recv(Socket, 0) of {ok,Data} -> B0 = list_to_binary(Data), - {_,B1} = erlang:split_binary(B0,1), - Header = binary_to_term(B1), - Siz = byte_size(term_to_binary(Header)), - {_,B2} = erlang:split_binary(B1,Siz), + <> = B0, + {Header,Siz} = binary_to_term(B1,[used]), + <<_:Siz/binary,B2/binary>> = B1, Message = case (catch binary_to_term(B2)) of {'EXIT', _} -> could_not_digest_message; -- cgit v1.2.3