Eliminate MY_IS_SSMALL()

For a long time, there has been the two macros IS_SSMALL() and MY_IS_SSMALL() that do exactly the same thing. There should only be one, and it should be called IS_SSMALL(). However, we must decide which implementation to use. When MY_IS_SSMALL() was introduced a long time ago, it was the most efficient. In a modern C compiler, there might not be any difference. To find out, I used the following small C program to examine the code generation: #include <stdio.h> typedef unsigned int Uint32; typedef unsigned long Uint64; typedef long Sint; #define SWORD_CONSTANT(Const) Const##L #define SMALL_BITS (64-4) #define MAX_SMALL ((SWORD_CONSTANT(1) << (SMALL_BITS-1))-1) #define MIN_SMALL (-(SWORD_CONSTANT(1) << (SMALL_BITS-1))) #define MY_IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? MY_IS_SSMALL32(x) : MY_IS_SSMALL64(x)) #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) void original(Sint n) { if (IS_SSMALL(n)) { printf("yes\n"); } } void enhanced(Sint n) { if (MY_IS_SSMALL(n)) { printf("yes\n"); } } gcc 7.2 produced the following code for the original() function: .LC0: .string "yes" original(long): movabs rax, 576460752303423488 add rdi, rax movabs rax, 1152921504606846975 cmp rdi, rax jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang 5.0.0 produced the following code which is slightly better: original(long): movabs rax, 576460752303423488 add rax, rdi shr rax, 60 jne .LBB0_1 mov edi, .Lstr jmp puts # TAILCALL .LBB0_1: ret .Lstr: .asciz "yes" However, in the context of beam_emu.c, clang could produce similar to what gcc produced. gcc 7.2 produced the following code when MY_IS_SSMALL() was used: .LC0: .string "yes" enhanced(long): sar rdi, 59 add rdi, 1 cmp rdi, 1 jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang produced similar code. This code seems to be the cheapest. There are four instructions, and there is no loading of huge integer constants.
author: Björn Gustavsson <[email protected]> 2017-09-25 07:18:56 +0200
committer: Björn Gustavsson <[email protected]> 2017-09-28 06:58:16 +0200
commit: 281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 (patch)
tree: f9779bb042a63a905d716ef333f4004db729b629 /erts/emulator/beam/big.h
parent: 28c7d822eeb56c9f3955cb936e3f90e8971b9a71 (diff)
download: otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.gz
otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.bz2
otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.zip
1 files changed, 14 insertions, 1 deletions
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index 48efce20e7..7556205063 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -70,7 +70,20 @@ typedef Uint  dsize_t;	 /* Vector size type */
 
 /* Check for small */
 #define IS_USMALL(sgn,x)  ((sgn) ? ((x) <= MAX_SMALL+1) : ((x) <= MAX_SMALL))
-#define IS_SSMALL(x)      (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL))
+
+/*
+ * It seems that both clang and gcc will generate sub-optimal code
+ * for the more obvious way to write the range check:
+ *
+ *    #define IS_SSMALL(x)  (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL))
+ *
+ * Note that IS_SSMALL() may be used in the 32-bit emulator with
+ * a Uint64 argument. Therefore, we must test the size of the argument
+ * to ensure that the cast does not discard the high-order 32 bits.
+ */
+#define _IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2)
+#define _IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2)
+#define IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? _IS_SSMALL32(x) : _IS_SSMALL64(x))
 
 /* The heap size needed for a bignum */
 #define BIG_NEED_SIZE(x)  ((x) + 1)
author	Björn Gustavsson <[email protected]>	2017-09-25 07:18:56 +0200
committer	Björn Gustavsson <[email protected]>	2017-09-28 06:58:16 +0200
commit	281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 (patch)
tree	f9779bb042a63a905d716ef333f4004db729b629 /erts/emulator/beam/big.h
parent	28c7d822eeb56c9f3955cb936e3f90e8971b9a71 (diff)
download	otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.gz otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.tar.bz2 otp-281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2.zip