From 281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 25 Sep 2017 07:18:56 +0200 Subject: Eliminate MY_IS_SSMALL() For a long time, there has been the two macros IS_SSMALL() and MY_IS_SSMALL() that do exactly the same thing. There should only be one, and it should be called IS_SSMALL(). However, we must decide which implementation to use. When MY_IS_SSMALL() was introduced a long time ago, it was the most efficient. In a modern C compiler, there might not be any difference. To find out, I used the following small C program to examine the code generation: #include typedef unsigned int Uint32; typedef unsigned long Uint64; typedef long Sint; #define SWORD_CONSTANT(Const) Const##L #define SMALL_BITS (64-4) #define MAX_SMALL ((SWORD_CONSTANT(1) << (SMALL_BITS-1))-1) #define MIN_SMALL (-(SWORD_CONSTANT(1) << (SMALL_BITS-1))) #define MY_IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? MY_IS_SSMALL32(x) : MY_IS_SSMALL64(x)) #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) void original(Sint n) { if (IS_SSMALL(n)) { printf("yes\n"); } } void enhanced(Sint n) { if (MY_IS_SSMALL(n)) { printf("yes\n"); } } gcc 7.2 produced the following code for the original() function: .LC0: .string "yes" original(long): movabs rax, 576460752303423488 add rdi, rax movabs rax, 1152921504606846975 cmp rdi, rax jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang 5.0.0 produced the following code which is slightly better: original(long): movabs rax, 576460752303423488 add rax, rdi shr rax, 60 jne .LBB0_1 mov edi, .Lstr jmp puts # TAILCALL .LBB0_1: ret .Lstr: .asciz "yes" However, in the context of beam_emu.c, clang could produce similar to what gcc produced. gcc 7.2 produced the following code when MY_IS_SSMALL() was used: .LC0: .string "yes" enhanced(long): sar rdi, 59 add rdi, 1 cmp rdi, 1 jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang produced similar code. This code seems to be the cheapest. There are four instructions, and there is no loading of huge integer constants. --- erts/emulator/beam/big.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam/big.h') diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 48efce20e7..7556205063 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -70,7 +70,20 @@ typedef Uint dsize_t; /* Vector size type */ /* Check for small */ #define IS_USMALL(sgn,x) ((sgn) ? ((x) <= MAX_SMALL+1) : ((x) <= MAX_SMALL)) -#define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + +/* + * It seems that both clang and gcc will generate sub-optimal code + * for the more obvious way to write the range check: + * + * #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) + * + * Note that IS_SSMALL() may be used in the 32-bit emulator with + * a Uint64 argument. Therefore, we must test the size of the argument + * to ensure that the cast does not discard the high-order 32 bits. + */ +#define _IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define _IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) +#define IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? _IS_SSMALL32(x) : _IS_SSMALL64(x)) /* The heap size needed for a bignum */ #define BIG_NEED_SIZE(x) ((x) + 1) -- cgit v1.2.3