From 21041ee50217d90dc21dd484b9dd26c469e8cf87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 13 Apr 2016 15:07:52 +0200 Subject: beam_debug: Correct masking when unpacking packed operands --- erts/emulator/beam/beam_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index e37bd4d78c..7fddf15dab 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -431,7 +431,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) packed >>= 10; break; case '0': /* Tight shift */ - *ap++ = packed & (BEAM_TIGHT_MASK / sizeof(Eterm)); + *ap++ = packed & BEAM_TIGHT_MASK; packed >>= BEAM_TIGHT_SHIFT; break; case '6': /* Shift 16 steps */ -- cgit v1.2.3 From 19f231024d5fc4e0e7084b6b38ca404f3368844a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 14 Apr 2016 10:04:49 +0200 Subject: Eliminate misleading #ifdef ARCH_64 in beam_opcodes.h There is a '#ifdef ARCH_64' beam_opcodes.h, which might make you think that files generated by beam_makeops will work for both 32-bit and 64-bit architectures. They will not. beam_makeops will generate different code depending on its -wordsize option. --- erts/emulator/utils/beam_makeops | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 2e7073a8f0..31cd072d5a 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -624,19 +624,25 @@ sub emulator_output { print "#define NUM_SPECIFIC_OPS ", scalar(@op_to_name), "\n"; print "#define SCRATCH_X_REG 1023\n"; print "\n"; - print "#ifdef ARCH_64\n"; - print "# define BEAM_WIDE_MASK 0xFFFFUL\n"; - print "# define BEAM_LOOSE_MASK 0xFFFFUL\n"; - print "# define BEAM_TIGHT_MASK 0xFFFFUL\n"; - print "# define BEAM_WIDE_SHIFT 32\n"; - print "# define BEAM_LOOSE_SHIFT 16\n"; - print "# define BEAM_TIGHT_SHIFT 16\n"; - print "#else\n"; - print "# define BEAM_LOOSE_MASK 0xFFF\n"; - print "# define BEAM_TIGHT_MASK 0xFFC\n"; - print "# define BEAM_LOOSE_SHIFT 16\n"; - print "# define BEAM_TIGHT_SHIFT 10\n"; - print "#endif\n"; + if ($wordsize == 32) { + print "#if defined(ARCH_64)\n"; + print qq[ #error "32-bit architecture assumed, but ARCH_64 is defined"\n]; + print "#endif\n"; + print "#define BEAM_LOOSE_MASK 0xFFF\n"; + print "#define BEAM_TIGHT_MASK 0xFFC\n"; + print "#define BEAM_LOOSE_SHIFT 16\n"; + print "#define BEAM_TIGHT_SHIFT 10\n"; + } elsif ($wordsize == 64) { + print "#if !defined(ARCH_64)\n"; + print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n]; + print "#endif\n"; + print "#define BEAM_WIDE_MASK 0xFFFFUL\n"; + print "#define BEAM_LOOSE_MASK 0xFFFFUL\n"; + print "#define BEAM_TIGHT_MASK 0xFFFFUL\n"; + print "#define BEAM_WIDE_SHIFT 32\n"; + print "#define BEAM_LOOSE_SHIFT 16\n"; + print "#define BEAM_TIGHT_SHIFT 16\n"; + } print "\n"; # -- cgit v1.2.3 From e72f710ae493cf7ba5da375632b096830df2de5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 13 Apr 2016 16:22:31 +0200 Subject: Correct unpacking of 3 operands on 32-bit archictectures 0a4750f91c83 optimized unpacking by removing a mask operation when unpacking three packed operands. Unfortunately, that optimization is only safe on 64-bit architectures. Here is what happens on 32-bit architectures. The operands to be packed are 10-bit register numbers that have been turned into byte offsets: aaaaaaaaaa00 bbbbbbbbbb00 cccccccccc00 They can be packed into a single word like this: 30 20 10 0 | | | | aa aaaaaaaabb bbbbbbbbcc cccccccc00 If we call the packed word P, the original operands can be extracted like this: C = P band 2#111111111100 B = (P bsr 10) band 2#111111111100 A = (P bsr 20) band 2#111111111100 The bug was that A was extracted without the masking: A = P bsr 20 That would give A the value: aaaaaaaaaaaabb That would only be safe if the two most significant bits in B were zeroes. --- erts/emulator/utils/beam_makeops | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 31cd072d5a..24eace9fc4 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -48,7 +48,7 @@ $pack_shift[4] = ['0', 'BEAM_LOOSE_SHIFT', # Only for 64 bit wordsize '(3*BEAM_LOOSE_SHIFT)']; $pack_mask[2] = ['BEAM_LOOSE_MASK', $WHOLE_WORD]; -$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; +$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK']; $pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize 'BEAM_LOOSE_MASK', 'BEAM_LOOSE_MASK', @@ -251,6 +251,7 @@ $args_per_word[5] = 3; $args_per_word[6] = 3; if ($wordsize == 64) { + $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; $args_per_word[4] = 4; } -- cgit v1.2.3 From 20ffaef67ccaf4f2a7df80d25a88ce8cb2a2a15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 14 Apr 2016 07:22:07 +0200 Subject: Mend beam_SUITE:packed_registers/1 packed_registers/1 may have actually tested put_list/3 instructions with high register numbers at some time. Currently, the compiler will generate code that only uses low register numbers. Totally rewrite the test case. It is difficult to arrange so that put_list/3 uses three high register numbers, so we will use get_list/3 instructions with high register numbers. --- erts/emulator/test/beam_SUITE.erl | 77 +++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/erts/emulator/test/beam_SUITE.erl b/erts/emulator/test/beam_SUITE.erl index f61ab431e9..ca37ab68a7 100644 --- a/erts/emulator/test/beam_SUITE.erl +++ b/erts/emulator/test/beam_SUITE.erl @@ -29,6 +29,7 @@ -export([applied/2,swap_temp_applied/1]). -include_lib("common_test/include/ct.hrl"). +-include_lib("syntax_tools/include/merl.hrl"). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -93,48 +94,46 @@ applied(Starter, N) -> apply_last_bif(Config) when is_list(Config) -> apply(erlang, abs, [1]). -%% Test three high register numbers in a put_list instruction -%% (to test whether packing works properly). +%% Test whether packing works properly. packed_registers(Config) when is_list(Config) -> - PrivDir = proplists:get_value(priv_dir, Config), - Mod = packed_regs, - Name = filename:join(PrivDir, atom_to_list(Mod) ++ ".erl"), - - %% Generate a module which generates a list of tuples. - %% put_list(A) -> [{A, 600}, {A, 999}, ... {A, 0}]. - Code = gen_packed_regs(600, ["-module("++atom_to_list(Mod)++").\n", - "-export([put_list/1]).\n", - "put_list(A) ->\n["]), - ok = file:write_file(Name, Code), - - %% Compile the module. - io:format("Compiling: ~s\n", [Name]), - CompRc = compile:file(Name, [{outdir, PrivDir}, report]), - io:format("Result: ~p\n",[CompRc]), - {ok, Mod} = CompRc, - - %% Load it. - io:format("Loading...\n",[]), - LoadRc = code:load_abs(filename:join(PrivDir, atom_to_list(Mod))), - {module,_Module} = LoadRc, - - %% Call it and verify result. - Term = {a, b}, - L = Mod:put_list(Term), - verify_packed_regs(L, Term, 600), + Mod = ?FUNCTION_NAME, + + %% Generate scrambled sequence. + Seq0 = [{erlang:phash2(I),I} || I <- lists:seq(0, 260)], + Seq = [I || {_,I} <- lists:sort(Seq0)], + + %% Generate a test modules that uses get_list/3 instructions + %% with high register numbers. + S0 = [begin + VarName = list_to_atom("V"++integer_to_list(V)), + {merl:var(VarName),V} + end || V <- Seq], + Vars = [V || {V,_} <- S0], + NewVars = [begin + VarName = list_to_atom("M"++integer_to_list(V)), + merl:var(VarName) + end || V <- Seq], + S = [?Q("_@Var = id(_@Value@)") || {Var,Value} <- S0], + Code = ?Q(["-module('@Mod@').\n" + "-export([f/0]).\n" + "f() ->\n" + "_@S,\n" + "_ = id(0),\n" + "L = [_@Vars],\n" + "_ = id(1),\n" + "[_@NewVars] = L,\n" %Test get_list/3. + "_ = id(2),\n" + "id([_@Vars,_@NewVars]).\n" + "id(I) -> I.\n"]), + merl:compile_and_load(Code), + CombinedSeq = Seq ++ Seq, + CombinedSeq = Mod:f(), + + %% Clean up. + true = code:delete(Mod), + false = code:purge(Mod), ok. -gen_packed_regs(0, Acc) -> - [Acc|"{A,0}].\n"]; -gen_packed_regs(N, Acc) -> - gen_packed_regs(N-1, [Acc,"{A,",integer_to_list(N)|"},\n"]). - -verify_packed_regs([], _, -1) -> ok; -verify_packed_regs([{Term, N}| T], Term, N) -> - verify_packed_regs(T, Term, N-1); -verify_packed_regs(L, Term, N) -> - ct:fail("Expected [{~p, ~p}|T]; got\n~p\n", [Term, N, L]). - buildo_mucho(Config) when is_list(Config) -> buildo_mucho_1(), ok. -- cgit v1.2.3 From 9b2ee6cfdc09090df81be40ca1d7358c3d273743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 9 Apr 2016 09:51:10 +0200 Subject: Optimize get_tuple_element instructions that target Y registers Several improvements in the compiler (e.g. c288ab87fd6) has lead to an Y register being the target for get_tuple_element instructions. Therefore, introduce i_get_tuple_element2y that combines two consecutive get_tuple_element instructions that target Y registers. --- erts/emulator/beam/beam_emu.c | 11 +++++++++++ erts/emulator/beam/ops.tab | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index d648a2f23c..ce92f60fd2 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -702,6 +702,17 @@ void** beam_ops; dst[1] = E2; \ } while (0) +#define GetTupleElement2Y(Src, Element, D1, D2) \ + do { \ + Eterm* src; \ + Eterm E1, E2; \ + src = ADD_BYTE_OFFSET(tuple_val(Src), (Element)); \ + E1 = src[0]; \ + E2 = src[1]; \ + D1 = E1; \ + D2 = E2; \ + } while (0) + #define GetTupleElement3(Src, Element, Dest) \ do { \ Eterm* src; \ diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 96a3a72bb5..aa720c5b0c 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -229,6 +229,9 @@ i_get_tuple_element y P y %macro: i_get_tuple_element2 GetTupleElement2 -pack i_get_tuple_element2 x P x +%macro: i_get_tuple_element2y GetTupleElement2Y -pack +i_get_tuple_element2y x P y y + %macro: i_get_tuple_element3 GetTupleElement3 -pack i_get_tuple_element3 x P x @@ -649,6 +652,9 @@ get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ succ(P1, P2) | succ(D1, D2) => i_get_tuple_element2 Reg P1 D1 +get_tuple_element Reg=x P1 D1=y | get_tuple_element Reg=x P2 D2=y | \ + succ(P1, P2) => i_get_tuple_element2y Reg P1 D1 D2 + get_tuple_element Reg P Dst => i_get_tuple_element Reg P Dst is_integer Fail=f i => -- cgit v1.2.3