diff options
-rw-r--r-- | erts/doc/src/erlang.xml | 29 | ||||
-rw-r--r-- | erts/emulator/beam/external.c | 97 | ||||
-rw-r--r-- | erts/emulator/test/binary_SUITE.erl | 77 | ||||
-rw-r--r-- | erts/preloaded/src/erlang.erl | 6 | ||||
-rw-r--r-- | lib/kernel/test/erl_distribution_wb_SUITE.erl | 8 |
5 files changed, 153 insertions, 64 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index c77f426919..3b7b9d6a50 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -557,9 +557,7 @@ hello <name name="binary_to_term" arity="2"/> <fsummary>Decode an Erlang external term format binary.</fsummary> <desc> - <p>As <c>binary_to_term/1</c>, but takes options that affect decoding - of the binary.</p> - <p>Option:</p> + <p>As <c>binary_to_term/1</c>, but takes these options:</p> <taglist> <tag><c>safe</c></tag> <item> @@ -575,18 +573,31 @@ hello creation of new external function references. None of those resources are garbage collected, so unchecked creation of them can exhaust available memory.</p> - </item> - </taglist> - <p>Failure: <c>badarg</c> if <c>safe</c> is specified and unsafe - data is decoded.</p> <pre> -> <input>binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).</input> +> <input>binary_to_term(<<131,100,0,5,"hello">>, [safe]).</input> ** exception error: bad argument > <input>hello.</input> hello -> <input>binary_to_term(<<131,100,0,5,104,101,108,108,111>>, [safe]).</input> +> <input>binary_to_term(<<131,100,0,5,"hello">>, [safe]).</input> hello </pre> + </item> + <tag><c>used</c></tag> + <item> + <p>Changes the return value to <c>{Term, Used}</c> where <c>Used</c> + is the number of bytes actually read from <c>Binary</c>.</p> + <pre> +> <input>Input = <<131,100,0,5,"hello","world">>.</input> +<<131,100,0,5,104,101,108,108,111,119,111,114,108,100>> +> <input>{Term, Used} = binary_to_term(Input, [used]).</input> +{hello, 9} +> <input>split_binary(Input, Used).</input> +{<<131,100,0,5,104,101,108,108,111>>, <<"world">>} +</pre> + </item> + </taglist> + <p>Failure: <c>badarg</c> if <c>safe</c> is specified and unsafe + data is decoded.</p> <p>See also <seealso marker="#term_to_binary/1"><c>term_to_binary/1</c></seealso>, <seealso marker="#binary_to_term/1"> diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 970158933f..6666c42778 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -122,8 +122,6 @@ static int encode_size_struct_int(struct TTBSizeContext_*, ErtsAtomCacheMap *acm static Export binary_to_term_trap_export; static BIF_RETTYPE binary_to_term_trap_1(BIF_ALIST_1); -static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binary* context_b, - Export *bif, Eterm arg0, Eterm arg1); void erts_init_external(void) { erts_init_trap_export(&term_to_binary_trap_export, @@ -1220,7 +1218,8 @@ typedef struct B2TContext_t { ErtsBinary2TermState b2ts; Uint32 flags; SWord reds; - Eterm trap_bin; + Uint used_bytes; /* In: boolean, Out: bytes */ + Eterm trap_bin; /* THE_NON_VALUE if not exported */ Export *bif; Eterm arg[2]; enum B2TState state; @@ -1314,6 +1313,11 @@ binary2term_prepare(ErtsBinary2TermState *state, byte *data, Sint data_size, ctx->u.uc.dbytes = state->extp; ctx->u.uc.dleft = dest_len; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes == 1); + /* to be subtracted by stream.avail_in when done */ + ctx->used_bytes = data_size; + } ctx->state = B2TUncompressChunk; *ctxp = ctx; } @@ -1416,13 +1420,15 @@ static int b2t_context_destructor(Binary *context_bin) return 1; } +static BIF_RETTYPE binary_to_term_int(Process*, Eterm bin, B2TContext*); + + static BIF_RETTYPE binary_to_term_trap_1(BIF_ALIST_1) { Binary *context_bin = erts_magic_ref2bin(BIF_ARG_1); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(context_bin) == b2t_context_destructor); - return binary_to_term_int(BIF_P, 0, THE_NON_VALUE, context_bin, NULL, - THE_NON_VALUE, THE_NON_VALUE); + return binary_to_term_int(BIF_P, THE_NON_VALUE, ERTS_MAGIC_BIN_DATA(context_bin)); } @@ -1448,6 +1454,8 @@ static B2TContext* b2t_export_context(Process* p, B2TContext* src) b2t_context_destructor); B2TContext* ctx = ERTS_MAGIC_BIN_DATA(context_b); Eterm* hp; + + ASSERT(is_non_value(src->trap_bin)); sys_memcpy(ctx, src, sizeof(B2TContext)); if (ctx->state >= B2TDecode && ctx->u.dc.next == &src->u.dc.res) { ctx->u.dc.next = &ctx->u.dc.res; @@ -1457,8 +1465,7 @@ static B2TContext* b2t_export_context(Process* p, B2TContext* src) return ctx; } -static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binary* context_b, - Export *bif_init, Eterm arg0, Eterm arg1) +static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx) { BIF_RETTYPE ret_val; #ifdef EXTREME_B2T_TRAPPING @@ -1466,25 +1473,17 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar #else SWord initial_reds = (Uint)(ERTS_BIF_REDS_LEFT(p) * B2T_BYTES_PER_REDUCTION); #endif - B2TContext c_buff; - B2TContext *ctx; int is_first_call; - if (context_b == NULL) { + if (is_value(bin)) { /* Setup enough to get started */ is_first_call = 1; - ctx = &c_buff; ctx->state = B2TPrepare; ctx->aligned_alloc = NULL; - ctx->flags = flags; - ctx->bif = bif_init; - ctx->arg[0] = arg0; - ctx->arg[1] = arg1; - IF_DEBUG(ctx->trap_bin = THE_NON_VALUE;) } else { - is_first_call = 0; - ctx = ERTS_MAGIC_BIN_DATA(context_b); + ASSERT(is_value(ctx->trap_bin)); ASSERT(ctx->state != B2TPrepare); + is_first_call = 0; } ctx->reds = initial_reds; @@ -1528,6 +1527,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar && zret == Z_STREAM_END && ctx->u.uc.dleft == 0) { ctx->reds -= chunk; + if (ctx->used_bytes) { + ASSERT(ctx->used_bytes > 5 + ctx->u.uc.stream.avail_in); + ctx->used_bytes -= ctx->u.uc.stream.avail_in; + } ctx->state = B2TSizeInit; } else { @@ -1546,11 +1549,11 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar break; case B2TDecodeInit: - if (ctx == &c_buff && ctx->b2ts.extsize > ctx->reds) { + if (is_non_value(ctx->trap_bin) && ctx->b2ts.extsize > ctx->reds) { /* dec_term will maybe trap, allocate space for magic bin before result term to make it easy to trim with HRelease. */ - ctx = b2t_export_context(p, &c_buff); + ctx = b2t_export_context(p, ctx); } ctx->u.dc.ep = ctx->b2ts.extp; ctx->u.dc.res = (Eterm) (UWord) NULL; @@ -1593,6 +1596,25 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar return ret_val; case B2TDone: + if (ctx->used_bytes) { + Eterm *hp; + Eterm used; + if (!ctx->b2ts.exttmp) { + ASSERT(ctx->used_bytes == 1); + ctx->used_bytes = (ctx->u.dc.ep - ctx->b2ts.extp + +1); /* VERSION_MAGIC */ + } + if (IS_USMALL(0, ctx->used_bytes)) { + hp = erts_produce_heap(&ctx->u.dc.factory, 3, 0); + used = make_small(ctx->used_bytes); + } + else { + hp = erts_produce_heap(&ctx->u.dc.factory, 3+BIG_UINT_HEAP_SIZE, 0); + used = uint_to_big(ctx->used_bytes, hp); + hp += BIG_UINT_HEAP_SIZE; + } + ctx->u.dc.res = TUPLE2(hp, ctx->u.dc.res, used); + } b2t_destroy_context(ctx); if (ctx->u.dc.factory.hp > ctx->u.dc.factory.hp_end) { @@ -1613,11 +1635,10 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Uint32 flags, Eterm bin, Binar } }while (ctx->reds > 0 || ctx->state >= B2TDone); - if (ctx == &c_buff) { - ASSERT(ctx->trap_bin == THE_NON_VALUE); - ctx = b2t_export_context(p, &c_buff); + if (is_non_value(ctx->trap_bin)) { + ctx = b2t_export_context(p, ctx); + ASSERT(is_value(ctx->trap_bin)); } - ASSERT(ctx->trap_bin != THE_NON_VALUE); if (is_first_call) { erts_set_gc_state(p, 0); @@ -1634,23 +1655,35 @@ HIPE_WRAPPER_BIF_DISABLE_GC(binary_to_term, 1) BIF_RETTYPE binary_to_term_1(BIF_ALIST_1) { - return binary_to_term_int(BIF_P, 0, BIF_ARG_1, NULL, bif_export[BIF_binary_to_term_1], - BIF_ARG_1, THE_NON_VALUE); + B2TContext ctx; + + ctx.flags = 0; + ctx.used_bytes = 0; + ctx.trap_bin = THE_NON_VALUE; + ctx.bif = bif_export[BIF_binary_to_term_1]; + ctx.arg[0] = BIF_ARG_1; + ctx.arg[1] = THE_NON_VALUE; + return binary_to_term_int(BIF_P, BIF_ARG_1, &ctx); } HIPE_WRAPPER_BIF_DISABLE_GC(binary_to_term, 2) BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) { + B2TContext ctx; Eterm opts; Eterm opt; - Uint32 flags = 0; + ctx.flags = 0; + ctx.used_bytes = 0; opts = BIF_ARG_2; while (is_list(opts)) { opt = CAR(list_val(opts)); if (opt == am_safe) { - flags |= ERTS_DIST_EXT_BTT_SAFE; + ctx.flags |= ERTS_DIST_EXT_BTT_SAFE; + } + else if (opt == am_used) { + ctx.used_bytes = 1; } else { goto error; @@ -1661,8 +1694,11 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2) if (is_not_nil(opts)) goto error; - return binary_to_term_int(BIF_P, flags, BIF_ARG_1, NULL, bif_export[BIF_binary_to_term_2], - BIF_ARG_1, BIF_ARG_2); + ctx.trap_bin = THE_NON_VALUE; + ctx.bif = bif_export[BIF_binary_to_term_2]; + ctx.arg[0] = BIF_ARG_1; + ctx.arg[1] = BIF_ARG_2; + return binary_to_term_int(BIF_P, BIF_ARG_1, &ctx); error: BIF_ERROR(BIF_P, BADARG); @@ -4000,6 +4036,7 @@ dec_term_atom_common: if (ctx) { ctx->state = B2TDone; ctx->reds = reds; + ctx->u.dc.ep = ep; } return ep; diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl index 61536bacd7..cbc2d8fae5 100644 --- a/erts/emulator/test/binary_SUITE.erl +++ b/erts/emulator/test/binary_SUITE.erl @@ -48,6 +48,7 @@ bad_list_to_binary/1, bad_binary_to_list/1, t_split_binary/1, bad_split/1, terms/1, terms_float/1, float_middle_endian/1, + b2t_used_big/1, external_size/1, t_iolist_size/1, t_hash/1, bad_size/1, @@ -72,6 +73,7 @@ all() -> t_split_binary, bad_split, bad_list_to_binary, bad_binary_to_list, terms, terms_float, float_middle_endian, external_size, t_iolist_size, + b2t_used_big, bad_binary_to_term_2, safe_binary_to_term2, bad_binary_to_term, bad_terms, t_hash, bad_size, bad_term_to_binary, more_bad_terms, otp_5484, otp_5933, @@ -425,40 +427,77 @@ bad_term_to_binary(Config) when is_list(Config) -> terms(Config) when is_list(Config) -> TestFun = fun(Term) -> - try - S = io_lib:format("~p", [Term]), - io:put_chars(S) - catch - error:badarg -> - io:put_chars("bit sized binary") - end, + S = io_lib:format("~p", [Term]), + io:put_chars(S), Bin = term_to_binary(Term), case erlang:external_size(Bin) of Sz when is_integer(Sz), size(Bin) =< Sz -> ok end, - Bin1 = term_to_binary(Term, [{minor_version, 1}]), - case erlang:external_size(Bin1, [{minor_version, 1}]) of - Sz1 when is_integer(Sz1), size(Bin1) =< Sz1 -> - ok - end, + Bin1 = term_to_binary(Term, [{minor_version, 1}]), + case erlang:external_size(Bin1, [{minor_version, 1}]) of + Sz1 when is_integer(Sz1), size(Bin1) =< Sz1 -> + ok + end, Term = binary_to_term_stress(Bin), Term = binary_to_term_stress(Bin, [safe]), - Unaligned = make_unaligned_sub_binary(Bin), - Term = binary_to_term_stress(Unaligned), - Term = binary_to_term_stress(Unaligned, []), - Term = binary_to_term_stress(Bin, [safe]), + Bin_sz = byte_size(Bin), + {Term,Bin_sz} = binary_to_term_stress(Bin, [used]), + + BinE = <<Bin/binary, 1, 2, 3>>, + {Term,Bin_sz} = binary_to_term_stress(BinE, [used]), + + BinU = make_unaligned_sub_binary(Bin), + Term = binary_to_term_stress(BinU), + Term = binary_to_term_stress(BinU, []), + Term = binary_to_term_stress(BinU, [safe]), + {Term,Bin_sz} = binary_to_term_stress(BinU, [used]), + + BinUE = make_unaligned_sub_binary(BinE), + {Term,Bin_sz} = binary_to_term_stress(BinUE, [used]), + BinC = erlang:term_to_binary(Term, [compressed]), + BinC_sz = byte_size(BinC), + true = BinC_sz =< size(Bin), Term = binary_to_term_stress(BinC), - true = size(BinC) =< size(Bin), + {Term, BinC_sz} = binary_to_term_stress(BinC, [used]), + Bin = term_to_binary(Term, [{compressed,0}]), terms_compression_levels(Term, size(Bin), 1), - UnalignedC = make_unaligned_sub_binary(BinC), - Term = binary_to_term_stress(UnalignedC) + + BinUC = make_unaligned_sub_binary(BinC), + Term = binary_to_term_stress(BinUC), + {Term,BinC_sz} = binary_to_term_stress(BinUC, [used]), + + BinCE = <<BinC/binary, 1, 2, 3>>, + {Term,BinC_sz} = binary_to_term_stress(BinCE, [used]), + + BinUCE = make_unaligned_sub_binary(BinCE), + Term = binary_to_term_stress(BinUCE), + {Term,BinC_sz} = binary_to_term_stress(BinUCE, [used]) end, test_terms(TestFun), ok. +%% Test binary_to_term(_, [used]) returning a big Used integer. +b2t_used_big(_Config) -> + case erlang:system_info(wordsize) of + 8 -> + {skipped, "This is not a 32-bit machine"}; + 4 -> + %% Use a long utf8 atom for large external format but compact on heap. + BigAtom = binary_to_atom(<< <<16#F0908D88:32>> || _ <- lists:seq(1,255) >>, + utf8), + Atoms = (1 bsl 17) + (1 bsl 9), + BigAtomList = lists:duplicate(Atoms, BigAtom), + BigBin = term_to_binary(BigAtomList), + {BigAtomList, Used} = binary_to_term(BigBin, [used]), + 2 = erts_debug:size(Used), + Used = byte_size(BigBin), + Used = 1 + 1 + 4 + Atoms*(1+2+4*255) + 1, + ok + end. + terms_compression_levels(Term, UncompressedSz, Level) when Level < 10 -> BinC = erlang:term_to_binary(Term, [{compressed,Level}]), Term = binary_to_term_stress(BinC), diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index f743b7d26b..80bceae506 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -427,9 +427,11 @@ binary_to_term(_Binary) -> erlang:nif_error(undefined). %% binary_to_term/2 --spec binary_to_term(Binary, Opts) -> term() when +-spec binary_to_term(Binary, Opts) -> term() | {term(), Used} when Binary :: ext_binary(), - Opts :: [safe]. + Opt :: safe | used, + Opts :: [Opt], + Used :: pos_integer(). binary_to_term(_Binary, _Opts) -> erlang:nif_error(undefined). diff --git a/lib/kernel/test/erl_distribution_wb_SUITE.erl b/lib/kernel/test/erl_distribution_wb_SUITE.erl index 03aaee56b7..258ed4f88c 100644 --- a/lib/kernel/test/erl_distribution_wb_SUITE.erl +++ b/lib/kernel/test/erl_distribution_wb_SUITE.erl @@ -65,6 +65,7 @@ ?DFLAG_EXTENDED_PIDS_PORTS bor ?DFLAG_UTF8_ATOMS)). +-define(PASS_THROUGH, $p). -define(shutdown(X), exit(X)). -define(int16(X), [((X) bsr 8) band 16#ff, (X) band 16#ff]). @@ -676,10 +677,9 @@ recv_message(Socket) -> case gen_tcp:recv(Socket, 0) of {ok,Data} -> B0 = list_to_binary(Data), - {_,B1} = erlang:split_binary(B0,1), - Header = binary_to_term(B1), - Siz = byte_size(term_to_binary(Header)), - {_,B2} = erlang:split_binary(B1,Siz), + <<?PASS_THROUGH, B1/binary>> = B0, + {Header,Siz} = binary_to_term(B1,[used]), + <<_:Siz/binary,B2/binary>> = B1, Message = case (catch binary_to_term(B2)) of {'EXIT', _} -> could_not_digest_message; |