From 281ae7c2cf7e2dfd48cf50b2f68fd76f7c6ab0e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 25 Sep 2017 07:18:56 +0200 Subject: Eliminate MY_IS_SSMALL() For a long time, there has been the two macros IS_SSMALL() and MY_IS_SSMALL() that do exactly the same thing. There should only be one, and it should be called IS_SSMALL(). However, we must decide which implementation to use. When MY_IS_SSMALL() was introduced a long time ago, it was the most efficient. In a modern C compiler, there might not be any difference. To find out, I used the following small C program to examine the code generation: #include typedef unsigned int Uint32; typedef unsigned long Uint64; typedef long Sint; #define SWORD_CONSTANT(Const) Const##L #define SMALL_BITS (64-4) #define MAX_SMALL ((SWORD_CONSTANT(1) << (SMALL_BITS-1))-1) #define MIN_SMALL (-(SWORD_CONSTANT(1) << (SMALL_BITS-1))) #define MY_IS_SSMALL32(x) (((Uint32) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL64(x) (((Uint64) ((((x)) >> (SMALL_BITS-1)) + 1)) < 2) #define MY_IS_SSMALL(x) (sizeof(x) == sizeof(Uint32) ? MY_IS_SSMALL32(x) : MY_IS_SSMALL64(x)) #define IS_SSMALL(x) (((x) >= MIN_SMALL) && ((x) <= MAX_SMALL)) void original(Sint n) { if (IS_SSMALL(n)) { printf("yes\n"); } } void enhanced(Sint n) { if (MY_IS_SSMALL(n)) { printf("yes\n"); } } gcc 7.2 produced the following code for the original() function: .LC0: .string "yes" original(long): movabs rax, 576460752303423488 add rdi, rax movabs rax, 1152921504606846975 cmp rdi, rax jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang 5.0.0 produced the following code which is slightly better: original(long): movabs rax, 576460752303423488 add rax, rdi shr rax, 60 jne .LBB0_1 mov edi, .Lstr jmp puts # TAILCALL .LBB0_1: ret .Lstr: .asciz "yes" However, in the context of beam_emu.c, clang could produce similar to what gcc produced. gcc 7.2 produced the following code when MY_IS_SSMALL() was used: .LC0: .string "yes" enhanced(long): sar rdi, 59 add rdi, 1 cmp rdi, 1 jbe .L4 rep ret .L4: mov edi, OFFSET FLAT:.LC0 jmp puts clang produced similar code. This code seems to be the cheapest. There are four instructions, and there is no loading of huge integer constants. --- erts/emulator/beam/erl_gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'erts/emulator/beam/erl_gc.c') diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 8344c164fa..97a1ca915f 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -337,7 +337,7 @@ erts_heap_sizes(Process* p) for (i = num_heap_sizes-1; i >= 0; i--) { n += 2; - if (!MY_IS_SSMALL(heap_sizes[i])) { + if (!IS_SSMALL(heap_sizes[i])) { big += BIG_UINT_HEAP_SIZE; } } @@ -352,7 +352,7 @@ erts_heap_sizes(Process* p) Eterm num; Sint sz = heap_sizes[i]; - if (MY_IS_SSMALL(sz)) { + if (IS_SSMALL(sz)) { num = make_small(sz); } else { num = uint_to_big(sz, bigp); -- cgit v1.2.3