From 47a2ff39ac75c69b41f90f05a63fd9a2e6c0b36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 1 Sep 2018 10:57:33 +0200 Subject: Introduce a put_tuple2 instruction Sometimes when building a tuple, there is no way to avoid an extra `move` instruction. Consider this code: make_tuple(A) -> {ok,A}. The corresponding BEAM code looks like this: {test_heap,3,1}. {put_tuple,2,{x,1}}. {put,{atom,ok}}. {put,{x,0}}. {move,{x,1},{x,0}}. return. To avoid overwriting the source register `{x,0}`, a `move` instruction is necessary. The problem doesn't exist when building a list: %% build_list(A) -> [A]. {test_heap,2,1}. {put_list,{x,0},nil,{x,0}}. return. Introduce a new `put_tuple2` instruction that builds a tuple in a single instruction, so that the `move` instruction can be eliminated: %% make_tuple(A) -> {ok,A}. {test_heap,3,1}. {put_tuple2,{x,0},{list,[{atom,ok},{x,0}]}}. return. Note that the BEAM loader already combines `put_tuple` and `put` instructions into an internal instruction similar to `put_tuple2`. Therefore the introduction of the new instruction will not speed up execution of tuple building itself, but it will be less work for the loader to load the new instruction. --- erts/emulator/beam/beam_debug.c | 4 ++-- erts/emulator/beam/instrs.tab | 17 ++++++++++------- erts/emulator/beam/ops.tab | 15 ++++++++++++--- lib/compiler/src/beam_block.erl | 1 + lib/compiler/src/beam_disasm.erl | 17 +++++++++++++++++ lib/compiler/src/beam_except.erl | 9 +-------- lib/compiler/src/beam_flatten.erl | 1 + lib/compiler/src/beam_ssa_codegen.erl | 4 ++++ lib/compiler/src/beam_ssa_pre_codegen.erl | 23 +++++++++++++---------- lib/compiler/src/beam_validator.erl | 6 ++++++ lib/compiler/src/compile.erl | 4 ++-- lib/compiler/src/genop.tab | 5 +++++ lib/compiler/test/compile_SUITE.erl | 17 +++++++++++------ lib/hipe/icode/hipe_beam_to_icode.erl | 20 ++++++++++++++++++++ 14 files changed, 105 insertions(+), 38 deletions(-) diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 9633de2021..b33aab7eee 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -786,8 +786,8 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_put_tuple_xI: - case op_i_put_tuple_yI: + case op_put_tuple2_xI: + case op_put_tuple2_yI: case op_new_map_dtI: case op_update_map_assoc_sdtI: case op_update_map_exact_jsdtI: diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index 42c1168f85..da1dd3dc45 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -559,17 +559,19 @@ update_list(Hd, Dst) { HTOP += 2; } -i_put_tuple := i_put_tuple.make.fill; - -i_put_tuple.make(Dst) { - $Dst = make_tuple(HTOP); -} - -i_put_tuple.fill(Arity) { +put_tuple2(Dst, Arity) { Eterm* hp = HTOP; Eterm arity = $Arity; + /* + * If operands are not packed (in the 32-bit VM), + * is is not safe to use $Dst directly after I + * has been updated. + */ + Eterm* dst_ptr = &($Dst); + //| -no_next + ASSERT(arity != 0); *hp++ = make_arityval(arity); I = $NEXT_INSTRUCTION; do { @@ -586,6 +588,7 @@ i_put_tuple.fill(Arity) { break; } } while (--arity != 0); + *dst_ptr = make_tuple(HTOP); HTOP = hp; ASSERT(VALID_INSTR(* (Eterm *)I)); Goto(*I); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index d71594235e..d859c4bb24 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -483,9 +483,16 @@ is_eq f? s s is_ne f? s s # -# Putting things. +# Putting tuples. +# +# Code compiled with OTP 22 and later uses put_tuple2 to +# to construct a tuple. +# +# Code compiled before OTP 22 uses put_tuple + one put instruction +# per element. Translate to put_tuple2. # +i_put_tuple/2 put_tuple Arity Dst => i_put_tuple Dst u i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ @@ -495,10 +502,12 @@ i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ i_put_tuple Dst Arity Puts=* | put S => \ tuple_append_put(Arity, Dst, Puts, S) -i_put_tuple/2 +i_put_tuple Dst Arity Puts=* => put_tuple2 Dst Arity Puts -i_put_tuple xy I +put_tuple2 xy I +# +# Putting lists. # # The instruction "put_list Const [] Dst" were generated in rare # circumstances up to and including OTP 18. Starting with OTP 19, diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index c928fc7187..0ed2a03a81 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -89,6 +89,7 @@ collect({move,S,D}) -> {set,[D],[S],move}; collect({put_list,S1,S2,D}) -> {set,[D],[S1,S2],put_list}; collect({put_tuple,A,D}) -> {set,[D],[],{put_tuple,A}}; collect({put,S}) -> {set,[],[S],put}; +collect({put_tuple2,D,{list,Els}}) -> {set,[D],Els,put_tuple2}; collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}}; collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}}; collect({get_hd,S,D}) -> {set,[D],[S],get_hd}; diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl index 6cee9acae4..d0be39f520 100644 --- a/lib/compiler/src/beam_disasm.erl +++ b/lib/compiler/src/beam_disasm.erl @@ -373,6 +373,8 @@ disasm_instr(B, Bs, Atoms, Literals) -> disasm_map_inst(get_map_elements, Arity, Bs, Atoms, Literals); has_map_fields -> disasm_map_inst(has_map_fields, Arity, Bs, Atoms, Literals); + put_tuple2 -> + disasm_put_tuple2(Bs, Atoms, Literals); _ -> try decode_n_args(Arity, Bs, Atoms, Literals) of {Args, RestBs} -> @@ -413,6 +415,14 @@ disasm_map_inst(Inst, Arity, Bs0, Atoms, Literals) -> {List, RestBs} = decode_n_args(Len, Bs2, Atoms, Literals), {{Inst, Args ++ [{Z,U,List}]}, RestBs}. +disasm_put_tuple2(Bs, Atoms, Literals) -> + {X, Bs1} = decode_arg(Bs, Atoms, Literals), + {Z, Bs2} = decode_arg(Bs1, Atoms, Literals), + {U, Bs3} = decode_arg(Bs2, Atoms, Literals), + {u, Len} = U, + {List, RestBs} = decode_n_args(Len, Bs3, Atoms, Literals), + {{put_tuple2, [X,{Z,U,List}]}, RestBs}. + %%----------------------------------------------------------------------- %% decode_arg([Byte]) -> {Arg, [Byte]} %% @@ -1095,6 +1105,13 @@ resolve_inst({get_hd,[Src,Dst]},_,_,_) -> resolve_inst({get_tl,[Src,Dst]},_,_,_) -> {get_tl,Src,Dst}; +%% +%% OTP 22. +%% +resolve_inst({put_tuple2,[Dst,{{z,1},{u,_},List0}]},_,_,_) -> + List = resolve_args(List0), + {put_tuple2,Dst,{list,List}}; + %% %% Catches instructions that are not yet handled. %% diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl index 366ec6cd44..98831d87a7 100644 --- a/lib/compiler/src/beam_except.erl +++ b/lib/compiler/src/beam_except.erl @@ -113,14 +113,7 @@ dig_out_block([{set,[{x,0}],[{atom,if_clause}],move}]) -> {yes,if_end,[]}; dig_out_block([{set,[{x,0}],[{literal,{Exc,Value}}],move}|Is]) -> translate_exception(Exc, {literal,Value}, Is, 0); -dig_out_block([{set,[{x,0}],[Tuple],move}, - {set,[],[Value],put}, - {set,[],[{atom,Exc}],put}, - {set,[Tuple],[],{put_tuple,2}}|Is]) -> - translate_exception(Exc, Value, Is, 3); -dig_out_block([{set,[],[Value],put}, - {set,[],[{atom,Exc}],put}, - {set,[{x,0}],[],{put_tuple,2}}|Is]) -> +dig_out_block([{set,[{x,0}],[{atom,Exc},Value],put_tuple2}|Is]) -> translate_exception(Exc, Value, Is, 3); dig_out_block(_) -> no. diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index 9a1c5a1021..973d16a1bc 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -65,6 +65,7 @@ norm({set,[D],[S],move}) -> {move,S,D}; norm({set,[D],[S],fmove}) -> {fmove,S,D}; norm({set,[D],[S],fconv}) -> {fconv,S,D}; norm({set,[D],[S1,S2],put_list}) -> {put_list,S1,S2,D}; +norm({set,[D],Els,put_tuple2}) -> {put_tuple2,D,{list,Els}}; norm({set,[D],[],{put_tuple,A}}) -> {put_tuple,A,D}; norm({set,[],[S],put}) -> {put,S}; norm({set,[D],[S],{get_tuple_element,I}}) -> {get_tuple_element,S,I,D}; diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 357352269c..006c41c0e0 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -286,6 +286,8 @@ classify_heap_need(put_list, _) -> {put,2}; classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) -> {put,Words+1}; +classify_heap_need(put_tuple, Elements) -> + {put,length(Elements)+1}; classify_heap_need({bif,Name}, Args) -> case is_gc_bif(Name, Args) of false -> neutral; @@ -1360,6 +1362,8 @@ cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) -> [{Op,Src,N,Dst}]; cg_instr(put_list=Op, [Hd,Tl], Dst) -> [{Op,Hd,Tl,Dst}]; +cg_instr(put_tuple, Elements, Dst) -> + [{put_tuple2,Dst,{list,Elements}}]; cg_instr(put_tuple_arity, [{integer,Arity}], Dst) -> [{put_tuple,Arity,Dst}]; cg_instr(put_tuple_elements, Elements, _Dst) -> diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index bbc3739eb5..54aa2efaf6 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -79,8 +79,9 @@ {'ok',beam_ssa:b_module()}. module(#b_module{body=Fs0}=Module, Opts) -> + FixTuples = proplists:get_bool(no_put_tuple2, Opts), ExtraAnnos = proplists:get_bool(dprecg, Opts), - Ps = passes(ExtraAnnos), + Ps = passes(FixTuples, ExtraAnnos), Fs = functions(Fs0, Ps), {ok,Module#b_module{body=Fs}}. @@ -113,13 +114,16 @@ functions([], _Ps) -> []. }). -define(PASS(N), {N,fun N/1}). -passes(ExtraAnnos) -> +passes(FixTuples, ExtraAnnos) -> Ps = [?PASS(assert_no_critical_edges), %% Preliminaries. ?PASS(fix_bs), ?PASS(sanitize), - ?PASS(fix_tuples), + case FixTuples of + false -> ignore; + true -> ?PASS(fix_tuples) + end, ?PASS(place_frames), ?PASS(fix_receives), @@ -151,10 +155,7 @@ passes(ExtraAnnos) -> ?PASS(fix_aliased_regs), ?PASS(frame_size), ?PASS(turn_yregs)], - case ExtraAnnos of - false -> [P || P <- Ps, P =/= ignore]; - true -> Ps - end. + [P || P <- Ps, P =/= ignore]. function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, Ps) -> try @@ -693,9 +694,11 @@ prune_phi(#b_set{args=Args0}=Phi, Reachable) -> %%% %% fix_tuples(St0) -> St. -%% We must split tuple creation into two instruction to mirror the -%% the way tuples are created in BEAM. Each put_tuple instruction is -%% split into put_tuple_arity followed by put_tuple_elements. +%% If compatibility with a previous version of Erlang has been +%% requested, tuple creation must be split into two instruction to +%% mirror the the way tuples are created in BEAM prior to OTP 22. +%% Each put_tuple instruction is split into put_tuple_arity followed +%% by put_tuple_elements. fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) -> diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index b44771d8a9..953aced66e 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -349,6 +349,12 @@ valfun_1({put_list,A,B,Dst}, Vst0) -> assert_term(B, Vst0), Vst = eat_heap(2, Vst0), set_type_reg(cons, Dst, Vst); +valfun_1({put_tuple2,Dst,{list,Elements}}, Vst0) -> + _ = [assert_term(El, Vst0) || El <- Elements], + Size = length(Elements), + Vst = eat_heap(Size+1, Vst0), + Type = {tuple,Size}, + set_type_reg(Type, Dst, Vst); valfun_1({put_tuple,Sz,Dst}, Vst0) when is_integer(Sz) -> Vst1 = eat_heap(1, Vst0), Vst = set_type_reg(tuple_in_progress, Dst, Vst1), diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 3a835cfb2f..0a7f723c64 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -259,13 +259,13 @@ expand_opt(r19, Os) -> expand_opt(r20, Os) -> expand_opt_before_21(Os); expand_opt(r21, Os) -> - Os; + [no_put_tuple2|Os]; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; expand_opt(O, Os) -> [O|Os]. expand_opt_before_21(Os) -> - [no_get_hd_tl,no_ssa_opt_record,no_utf8_atoms|Os]. + [no_put_tuple2,no_get_hd_tl,no_ssa_opt_record,no_utf8_atoms|Os]. %% format_error(ErrorDescriptor) -> string() diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab index f35ae09fe7..8e34e3e291 100755 --- a/lib/compiler/src/genop.tab +++ b/lib/compiler/src/genop.tab @@ -573,3 +573,8 @@ BEAM_FORMAT_NUMBER=0 ## @doc Get the tail (or cdr) part of a list (a cons cell) from Source and ## put it into the register Tail. 163: get_tl/2 + +## @spec put_tuple2 Destination Elements +## @doc Build a tuple with the elements in the list Elements and put it +## put into register Destination. +164: put_tuple2/2 diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 85f0b7dc46..8d8fc23027 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -1476,18 +1476,22 @@ bc_options(Config) -> 101 = highest_opcode(DataDir, small_float, [no_get_hd_tl,no_line_info]), 103 = highest_opcode(DataDir, big, - [no_get_hd_tl,no_ssa_opt_record, + [no_put_tuple2, + no_get_hd_tl,no_ssa_opt_record, no_line_info,no_stack_trimming]), 125 = highest_opcode(DataDir, small_float, [no_get_hd_tl,no_line_info,no_ssa_opt_float]), 132 = highest_opcode(DataDir, small, - [no_get_hd_tl,no_ssa_opt_record,no_ssa_opt_float,no_line_info]), + [no_put_tuple2,no_get_hd_tl,no_ssa_opt_record, + no_ssa_opt_float,no_line_info]), - 136 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record,no_line_info]), + 136 = highest_opcode(DataDir, big, [no_put_tuple2,no_get_hd_tl, + no_ssa_opt_record,no_line_info]), - 153 = highest_opcode(DataDir, big, [no_get_hd_tl,no_ssa_opt_record]), + 153 = highest_opcode(DataDir, big, [no_put_tuple2,no_get_hd_tl, + no_ssa_opt_record]), 153 = highest_opcode(DataDir, big, [r16]), 153 = highest_opcode(DataDir, big, [r17]), 153 = highest_opcode(DataDir, big, [r18]), @@ -1499,9 +1503,10 @@ bc_options(Config) -> 158 = highest_opcode(DataDir, small_maps, [r18]), 158 = highest_opcode(DataDir, small_maps, [r19]), 158 = highest_opcode(DataDir, small_maps, [r20]), - 158 = highest_opcode(DataDir, small_maps, []), + 158 = highest_opcode(DataDir, small_maps, [r21]), - 163 = highest_opcode(DataDir, big, []), + 164 = highest_opcode(DataDir, small_maps, []), + 164 = highest_opcode(DataDir, big, []), ok. diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl index 4f099baab3..ffe81ef9b8 100644 --- a/lib/hipe/icode/hipe_beam_to_icode.erl +++ b/lib/hipe/icode/hipe_beam_to_icode.erl @@ -647,6 +647,13 @@ trans_fun([{put_tuple,_Size,Reg}|Instructions], Env) -> Primop = hipe_icode:mk_primop(Dest,mktuple,Src), Moves ++ [Primop | trans_fun(Instructions2,Env2)]; %%--- put --- SHOULD NOT REALLY EXIST HERE; put INSTRUCTIONS ARE HANDLED ABOVE. +%%--- put_tuple2 --- +trans_fun([{put_tuple2,Reg,{list,Elements}}|Instructions], Env) -> + Dest = [mk_var(Reg)], + {Moves,Vars,Env2} = trans_elements(Elements, [], [], Env), + Src = lists:reverse(Vars), + Primop = hipe_icode:mk_primop(Dest, mktuple, Src), + Moves ++ [Primop | trans_fun(Instructions, Env2)]; %%--- badmatch --- trans_fun([{badmatch,Arg}|Instructions], Env) -> BadVar = trans_arg(Arg), @@ -1699,6 +1706,19 @@ trans_puts([{put,X}|Code], Vars, Moves, Env) -> trans_puts(Code, Vars, Moves, Env) -> %% No more put operations {Moves, Code, Vars, Env}. +trans_elements([X|Code], Vars, Moves, Env) -> + case type(X) of + var -> + Var = mk_var(X), + trans_elements(Code, [Var|Vars], Moves, Env); + #beam_const{value=C} -> + Var = mk_var(new), + Move = hipe_icode:mk_move(Var, hipe_icode:mk_const(C)), + trans_elements(Code, [Var|Vars], [Move|Moves], Env) + end; +trans_elements([], Vars, Moves, Env) -> + {Moves, Vars, Env}. + %%----------------------------------------------------------------------- %% The code for this instruction is a bit large because we are treating %% different cases differently. We want to use the icode `type' -- cgit v1.2.3