From 281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 25 Sep 2017 07:18:56 +0200 Subject: Eliminate MY_IS_SSMALL() For a long time, there has been the two macros IS_SSMALL() and MY_IS_SSMALL() that do exactly the same thing. There should only be one, and it should be called IS_SSMALL(). However, we must decide which implementation to use. When MY_IS_SSMALL() was introduced a long time ago, it was the most efficient. In a modern C compiler, there might not be any difference. To find out, I used the following small C program to examine the code generation: #include typedef unsigned int Uint32; typedef unsigned long Uint64; typedef long Sint; #define SWORD_CONSTANT(Const) Const##L #define SMALL_BITS (64-4) #define MAX_SMALL ((SWORD_CONSTANT(1) << (SMALL_BITS-1))-1) #define MIN_SMALL (-(SWORD_CONSTANT(1) << (SMALL_BITS-1))) #define MY_IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? MY_IS_SSMALL32(x) : MY_IS_SSMALL64(x)) #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) void original(Sint n) { if (IS_SSMALL(n)) { printf("yes\n"); } } void enhanced(Sint n) { if (MY_IS_SSMALL(n)) { printf("yes\n"); } } gcc 7.2 produced the following code for the original() function: .LC0: .string "yes" original(long): movabs rax, 576460752303423488 add rdi, rax movabs rax, 1152921504606846975 cmp rdi, rax jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang 5.0.0 produced the following code which is slightly better: original(long): movabs rax, 576460752303423488 add rax, rdi shr rax, 60 jne .LBB0_1 mov edi, .Lstr jmp puts # TAILCALL .LBB0_1: ret .Lstr: .asciz "yes" However, in the context of beam_emu.c, clang could produce similar to what gcc produced. gcc 7.2 produced the following code when MY_IS_SSMALL() was used: .LC0: .string "yes" enhanced(long): sar rdi, 59 add rdi, 1 cmp rdi, 1 jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang produced similar code. This code seems to be the cheapest. There are four instructions, and there is no loading of huge integer constants. --- erts/emulator/beam/arith_instrs.tab | 11 ++++------- erts/emulator/beam/big.h | 15 ++++++++++++++- erts/emulator/beam/erl_arith.c | 14 +++++--------- erts/emulator/beam/erl_gc.c | 4 ++-- erts/emulator/beam/erl_term.h | 1 - erts/emulator/beam/external.c | 2 +- 6 files changed, 26 insertions(+), 21 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 7c9cd47e28..3db19df3c4 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -51,8 +51,7 @@ plus.fetch(Op1, Op2) { plus.execute(Fail, Live, Dst) { if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) { Sint i = signed_val(PlusOp1) + signed_val(PlusOp2); - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -74,8 +73,7 @@ minus.fetch(Op1, Op2) { minus.execute(Fail, Live, Dst) { if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) { Sint i = signed_val(MinusOp1) - signed_val(MinusOp2); - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -100,8 +98,7 @@ increment.execute(IncrementVal, Live, Dst) { increment_val = $IncrementVal; if (ERTS_LIKELY(is_small(increment_reg_val))) { Sint i = signed_val(increment_reg_val) + increment_val; - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -142,7 +139,7 @@ i_int_div(Fail, Live, Op1, Op2, Dst) { $BIF_ERROR_ARITY_2($Fail, BIF_intdiv_2, op1, op2); } else if (ERTS_LIKELY(is_both_small(op1, op2))) { Sint ires = signed_val(op1) / signed_val(op2); - if (ERTS_LIKELY(MY_IS_SSMALL(ires))) { + if (ERTS_LIKELY(IS_SSMALL(ires))) { $Dst = make_small(ires); $NEXT0(); } diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 48efce20e7..7556205063 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -70,7 +70,20 @@ typedef Uint dsize_t; /* Vector size type */ /* Check for small */ #define IS_USMALL(sgn,x) ((sgn) ? ((x) <= MAX_SMALL+1) : ((x) <= MAX_SMALL)) -#define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + +/* + * It seems that both clang and gcc will generate sub-optimal code + * for the more obvious way to write the range check: + * + * #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + * + * Note that IS_SSMALL() may be used in the 32-bit emulator with + * a Uint64 argument. Therefore, we must test the size of the argument + * to ensure that the cast does not discard the high-order 32 bits. + */ +#define _IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define _IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? _IS_SSMALL32(x) : _IS_SSMALL64(x)) /* The heap size needed for a bignum */ #define BIG_NEED_SIZE(x) ((x) + 1) diff --git a/erts/emulator/beam/erl_arith.c b/erts/emulator/beam/erl_arith.c index f2a3e411ec..b6625db0d3 100644 --- a/erts/emulator/beam/erl_arith.c +++ b/erts/emulator/beam/erl_arith.c @@ -114,7 +114,7 @@ BIF_RETTYPE intdiv_2(BIF_ALIST_2) } if (is_both_small(BIF_ARG_1,BIF_ARG_2)){ Sint ires = signed_val(BIF_ARG_1) / signed_val(BIF_ARG_2); - if (MY_IS_SSMALL(ires)) + if (IS_SSMALL(ires)) BIF_RET(make_small(ires)); } BIF_RET(erts_int_div(BIF_P, BIF_ARG_1, BIF_ARG_2)); @@ -340,8 +340,7 @@ erts_mixed_plus(Process* p, Eterm arg1, Eterm arg2) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) + signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { hp = HAlloc(p, 2); @@ -486,8 +485,7 @@ erts_mixed_minus(Process* p, Eterm arg1, Eterm arg2) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) - signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { hp = HAlloc(p, 2); @@ -1181,8 +1179,7 @@ erts_gc_mixed_plus(Process* p, Eterm* reg, Uint live) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) + signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { if (ERTS_NEED_GC(p, 2)) { @@ -1349,8 +1346,7 @@ erts_gc_mixed_minus(Process* p, Eterm* reg, Uint live) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) - signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { if (ERTS_NEED_GC(p, 2)) { diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 8344c164fa..97a1ca915f 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -337,7 +337,7 @@ erts_heap_sizes(Process* p) for (i = num_heap_sizes-1; i >= 0; i--) { n += 2; - if (!MY_IS_SSMALL(heap_sizes[i])) { + if (!IS_SSMALL(heap_sizes[i])) { big += BIG_UINT_HEAP_SIZE; } } @@ -352,7 +352,7 @@ erts_heap_sizes(Process* p) Eterm num; Sint sz = heap_sizes[i]; - if (MY_IS_SSMALL(sz)) { + if (IS_SSMALL(sz)) { num = make_small(sz); } else { num = uint_to_big(sz, bigp); diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 842802f8d9..6daf043117 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -270,7 +270,6 @@ _ET_DECLARE_CHECKED(Eterm*,list_val,Wterm) #define is_byte(x) (((x) & ((~(Uint)0 << (_TAG_IMMED1_SIZE+8)) + _TAG_IMMED1_MASK)) == _TAG_IMMED1_SMALL) #define is_valid_bit_size(x) (((Sint)(x)) >= 0 && ((x) & 0x7F) == _TAG_IMMED1_SMALL) #define is_not_valid_bit_size(x) (!is_valid_bit_size((x))) -#define MY_IS_SSMALL(x) (((Uint) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define _unchecked_unsigned_val(x) ((x) >> _TAG_IMMED1_SIZE) _ET_DECLARE_CHECKED(Uint,unsigned_val,Eterm) #define unsigned_val(x) _ET_APPLY(unsigned_val,(x)) diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 60cf09dc07..970158933f 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -3115,7 +3115,7 @@ dec_term(ErtsDistExternal *edep, #if defined(ARCH_64) *objp = make_small(sn); #else - if (MY_IS_SSMALL(sn)) { + if (IS_SSMALL(sn)) { *objp = make_small(sn); } else { *objp = small_to_big(sn, hp); -- cgit v1.2.3