diff options
author | Björn Gustavsson <[email protected]> | 2017-09-25 07:18:56 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2017-09-28 06:58:16 +0200 |
commit | 281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 (patch) | |
tree | f9779bb042a63a905d716ef333f4004db729b629 | |
parent | 28c7d822eeb56c9f3955cb936e3f90e8971b9a71 (diff) | |
download | otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.gz otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.bz2 otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.zip |
Eliminate MY_IS_SSMALL()
For a long time, there has been the two macros IS_SSMALL() and
MY_IS_SSMALL() that do exactly the same thing.
There should only be one, and it should be called IS_SSMALL().
However, we must decide which implementation to use. When
MY_IS_SSMALL() was introduced a long time ago, it was the most
efficient. In a modern C compiler, there might not be any
difference.
To find out, I used the following small C program to examine
the code generation:
#include <stdio.h>
typedef unsigned int Uint32;
typedef unsigned long Uint64;
typedef long Sint;
#define SWORD_CONSTANT(Const) Const##L
#define SMALL_BITS (64-4)
#define MAX_SMALL ((SWORD_CONSTANT(1) << (SMALL_BITS-1))-1)
#define MIN_SMALL (-(SWORD_CONSTANT(1) << (SMALL_BITS-1)))
#define MY_IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2)
#define MY_IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2)
#define MY_IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? MY_IS_SSMALL32(x) : MY_IS_SSMALL64(x))
#define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL))
void original(Sint n)
{
if (IS_SSMALL(n)) {
printf("yes\n");
}
}
void enhanced(Sint n)
{
if (MY_IS_SSMALL(n)) {
printf("yes\n");
}
}
gcc 7.2 produced the following code for the original() function:
.LC0:
.string "yes"
original(long):
movabs rax, 576460752303423488
add rdi, rax
movabs rax, 1152921504606846975
cmp rdi, rax
jbe .L4
rep ret
.L4:
mov edi, OFFSET FLAT:.LC0
jmp puts
clang 5.0.0 produced the following code which is slightly better:
original(long):
movabs rax, 576460752303423488
add rax, rdi
shr rax, 60
jne .LBB0_1
mov edi, .Lstr
jmp puts # TAILCALL
.LBB0_1:
ret
.Lstr:
.asciz "yes"
However, in the context of beam_emu.c, clang could produce
similar to what gcc produced.
gcc 7.2 produced the following code when MY_IS_SSMALL() was used:
.LC0:
.string "yes"
enhanced(long):
sar rdi, 59
add rdi, 1
cmp rdi, 1
jbe .L4
rep ret
.L4:
mov edi, OFFSET FLAT:.LC0
jmp puts
clang produced similar code.
This code seems to be the cheapest. There are four instructions, and
there is no loading of huge integer constants.
-rw-r--r-- | erts/emulator/beam/arith_instrs.tab | 11 | ||||
-rw-r--r-- | erts/emulator/beam/big.h | 15 | ||||
-rw-r--r-- | erts/emulator/beam/erl_arith.c | 14 | ||||
-rw-r--r-- | erts/emulator/beam/erl_gc.c | 4 | ||||
-rw-r--r-- | erts/emulator/beam/erl_term.h | 1 | ||||
-rw-r--r-- | erts/emulator/beam/external.c | 2 |
6 files changed, 26 insertions, 21 deletions
diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 7c9cd47e28..3db19df3c4 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -51,8 +51,7 @@ plus.fetch(Op1, Op2) { plus.execute(Fail, Live, Dst) { if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) { Sint i = signed_val(PlusOp1) + signed_val(PlusOp2); - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -74,8 +73,7 @@ minus.fetch(Op1, Op2) { minus.execute(Fail, Live, Dst) { if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) { Sint i = signed_val(MinusOp1) - signed_val(MinusOp2); - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -100,8 +98,7 @@ increment.execute(IncrementVal, Live, Dst) { increment_val = $IncrementVal; if (ERTS_LIKELY(is_small(increment_reg_val))) { Sint i = signed_val(increment_reg_val) + increment_val; - ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); - if (ERTS_LIKELY(MY_IS_SSMALL(i))) { + if (ERTS_LIKELY(IS_SSMALL(i))) { $Dst = make_small(i); $NEXT0(); } @@ -142,7 +139,7 @@ i_int_div(Fail, Live, Op1, Op2, Dst) { $BIF_ERROR_ARITY_2($Fail, BIF_intdiv_2, op1, op2); } else if (ERTS_LIKELY(is_both_small(op1, op2))) { Sint ires = signed_val(op1) / signed_val(op2); - if (ERTS_LIKELY(MY_IS_SSMALL(ires))) { + if (ERTS_LIKELY(IS_SSMALL(ires))) { $Dst = make_small(ires); $NEXT0(); } diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 48efce20e7..7556205063 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -70,7 +70,20 @@ typedef Uint dsize_t; /* Vector size type */ /* Check for small */ #define IS_USMALL(sgn,x) ((sgn) ? ((x) <= MAX_SMALL+1) : ((x) <= MAX_SMALL)) -#define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + +/* + * It seems that both clang and gcc will generate sub-optimal code + * for the more obvious way to write the range check: + * + * #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + * + * Note that IS_SSMALL() may be used in the 32-bit emulator with + * a Uint64 argument. Therefore, we must test the size of the argument + * to ensure that the cast does not discard the high-order 32 bits. + */ +#define _IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define _IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? _IS_SSMALL32(x) : _IS_SSMALL64(x)) /* The heap size needed for a bignum */ #define BIG_NEED_SIZE(x) ((x) + 1) diff --git a/erts/emulator/beam/erl_arith.c b/erts/emulator/beam/erl_arith.c index f2a3e411ec..b6625db0d3 100644 --- a/erts/emulator/beam/erl_arith.c +++ b/erts/emulator/beam/erl_arith.c @@ -114,7 +114,7 @@ BIF_RETTYPE intdiv_2(BIF_ALIST_2) } if (is_both_small(BIF_ARG_1,BIF_ARG_2)){ Sint ires = signed_val(BIF_ARG_1) / signed_val(BIF_ARG_2); - if (MY_IS_SSMALL(ires)) + if (IS_SSMALL(ires)) BIF_RET(make_small(ires)); } BIF_RET(erts_int_div(BIF_P, BIF_ARG_1, BIF_ARG_2)); @@ -340,8 +340,7 @@ erts_mixed_plus(Process* p, Eterm arg1, Eterm arg2) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) + signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { hp = HAlloc(p, 2); @@ -486,8 +485,7 @@ erts_mixed_minus(Process* p, Eterm arg1, Eterm arg2) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) - signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { hp = HAlloc(p, 2); @@ -1181,8 +1179,7 @@ erts_gc_mixed_plus(Process* p, Eterm* reg, Uint live) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) + signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { if (ERTS_NEED_GC(p, 2)) { @@ -1349,8 +1346,7 @@ erts_gc_mixed_minus(Process* p, Eterm* reg, Uint live) switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) { case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE): ires = signed_val(arg1) - signed_val(arg2); - ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires)); - if (MY_IS_SSMALL(ires)) { + if (IS_SSMALL(ires)) { return make_small(ires); } else { if (ERTS_NEED_GC(p, 2)) { diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 8344c164fa..97a1ca915f 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -337,7 +337,7 @@ erts_heap_sizes(Process* p) for (i = num_heap_sizes-1; i >= 0; i--) { n += 2; - if (!MY_IS_SSMALL(heap_sizes[i])) { + if (!IS_SSMALL(heap_sizes[i])) { big += BIG_UINT_HEAP_SIZE; } } @@ -352,7 +352,7 @@ erts_heap_sizes(Process* p) Eterm num; Sint sz = heap_sizes[i]; - if (MY_IS_SSMALL(sz)) { + if (IS_SSMALL(sz)) { num = make_small(sz); } else { num = uint_to_big(sz, bigp); diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 842802f8d9..6daf043117 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -270,7 +270,6 @@ _ET_DECLARE_CHECKED(Eterm*,list_val,Wterm) #define is_byte(x) (((x) & ((~(Uint)0 << (_TAG_IMMED1_SIZE+8)) + _TAG_IMMED1_MASK)) == _TAG_IMMED1_SMALL) #define is_valid_bit_size(x) (((Sint)(x)) >= 0 && ((x) & 0x7F) == _TAG_IMMED1_SMALL) #define is_not_valid_bit_size(x) (!is_valid_bit_size((x))) -#define MY_IS_SSMALL(x) (((Uint) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define _unchecked_unsigned_val(x) ((x) >> _TAG_IMMED1_SIZE) _ET_DECLARE_CHECKED(Uint,unsigned_val,Eterm) #define unsigned_val(x) _ET_APPLY(unsigned_val,(x)) diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 60cf09dc07..970158933f 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -3115,7 +3115,7 @@ dec_term(ErtsDistExternal *edep, #if defined(ARCH_64) *objp = make_small(sn); #else - if (MY_IS_SSMALL(sn)) { + if (IS_SSMALL(sn)) { *objp = make_small(sn); } else { *objp = small_to_big(sn, hp); |