From 1056d2d1fd49f669a2001f03890e13c9cba76c1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Thu, 25 Oct 2018 08:33:08 +0200
Subject: Inline erts_cmp

This greatly increases the performance of '--'/2 which does a lot
of term comparisons.
---
 erts/emulator/beam/atom.c      |  2 +-
 erts/emulator/beam/erl_utils.h | 61 ++++++++++++++++++++++++++++++++++++++--
 erts/emulator/beam/utils.c     | 64 ++++--------------------------------------
 3 files changed, 64 insertions(+), 63 deletions(-)

(limited to 'erts/emulator')

diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c
index 5381611fab..59b51fd15e 100644
--- a/erts/emulator/beam/atom.c
+++ b/erts/emulator/beam/atom.c
@@ -174,7 +174,7 @@ atom_alloc(Atom* tmpl)
 
     /*
      * Precompute ordinal value of first 3 bytes + 7 bits.
-     * This is used by utils.c:erts_cmp_atoms().
+     * This is used by erl_utils.h:erts_cmp_atoms().
      * We cannot use the full 32 bits of the first 4 bytes,
      * since we use the sign of the difference between two
      * ordinal values to represent their relative order.
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index b3bfa69052..880febba8b 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -22,6 +22,7 @@
 #define ERL_UTILS_H__
 
 #include "sys.h"
+#include "atom.h"
 #include "erl_printf.h"
 
 struct process;
@@ -112,10 +113,12 @@ int eq(Eterm, Eterm);
 
 #define EQ(x,y) (((x) == (y)) || (is_not_both_immed((x),(y)) && eq((x),(y))))
 
-int erts_cmp_atoms(Eterm a, Eterm b);
-Sint erts_cmp(Eterm, Eterm, int, int);
-Sint erts_cmp_compound(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE Sint erts_cmp(Eterm, Eterm, int, int);
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b);
+
 Sint cmp(Eterm a, Eterm b);
+Sint erts_cmp_compound(Eterm, Eterm, int, int);
+
 #define CMP(A,B)                         erts_cmp(A,B,0,0)
 #define CMP_TERM(A,B)                    erts_cmp(A,B,1,0)
 #define CMP_EQ_ONLY(A,B)                 erts_cmp(A,B,0,1)
@@ -150,4 +153,56 @@ Sint cmp(Eterm a, Eterm b);
 	if (erts_cmp_compound(X,Y,0,EqOnly) Op 0) { Action; };	\
     }
 
+#define erts_float_comp(x,y) (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
+    Atom *aa = atom_tab(atom_val(a));
+    Atom *bb = atom_tab(atom_val(b));
+
+    byte *name_a, *name_b;
+    int len_a, len_b, diff;
+
+    diff = aa->ord0 - bb->ord0;
+
+    if (diff != 0) {
+        return diff;
+    }
+
+    name_a = &aa->name[3];
+    name_b = &bb->name[3];
+    len_a = aa->len-3;
+    len_b = bb->len-3;
+
+    if (len_a > 0 && len_b > 0) {
+        diff = sys_memcmp(name_a, name_b, MIN(len_a, len_b));
+
+        if (diff != 0) {
+            return diff;
+        }
+    }
+
+    return len_a - len_b;
+}
+
+ERTS_GLB_INLINE Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only) {
+    if (is_atom(a) && is_atom(b)) {
+        return erts_cmp_atoms(a, b);
+    } else if (is_both_small(a, b)) {
+        return (signed_val(a) - signed_val(b));
+    } else if (is_float(a) && is_float(b)) {
+        FloatDef af, bf;
+
+        GET_DOUBLE(a, af);
+        GET_DOUBLE(b, bf);
+
+        return erts_float_comp(af.fd, bf.fd);
+    }
+
+    return erts_cmp_compound(a,b,exact,eq_only);
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
 #endif
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 08f8ca9788..d81bd89a48 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2615,27 +2615,6 @@ not_equal:
 }
 
 
-/* 
- * Lexically compare two strings of bytes (string s1 length l1 and s2 l2).
- *
- *	s1 < s2	return -1
- *	s1 = s2	return  0
- *	s1 > s2 return +1
- */
-static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
-{
-    int i;
-    i = 0;
-    while((i < l1) && (i < l2)) {
-	if (s1[i] < s2[i]) return(-1);
-	if (s1[i] > s2[i]) return(1);
-	i++;
-    }
-    if (l1 < l2) return(-1);
-    if (l1 > l2) return(1);
-    return(0);
-}
-
 
 /*
  * Compare objects.
@@ -2649,20 +2628,6 @@ static int cmpbytes(byte *s1, int l1, byte *s2, int l2)
  *
  */
 
-
-#define float_comp(x,y)    (((x)<(y)) ? -1 : (((x)==(y)) ? 0 : 1))
-
-int erts_cmp_atoms(Eterm a, Eterm b)
-{
-    Atom *aa = atom_tab(atom_val(a));
-    Atom *bb = atom_tab(atom_val(b));
-    int diff = aa->ord0 - bb->ord0;
-    if (diff)
-	return diff;
-    return cmpbytes(aa->name+3, aa->len-3,
-		    bb->name+3, bb->len-3);
-}
-
 /* cmp(Eterm a, Eterm b)
  *  For compatibility with HiPE - arith-based compare.
  */
@@ -2673,22 +2638,6 @@ Sint cmp(Eterm a, Eterm b)
 
 Sint erts_cmp_compound(Eterm a, Eterm b, int exact, int eq_only);
 
-Sint erts_cmp(Eterm a, Eterm b, int exact, int eq_only)
-{
-    if (is_atom(a) && is_atom(b)) {
-        return erts_cmp_atoms(a, b);
-    } else if (is_both_small(a, b)) {
-        return (signed_val(a) - signed_val(b));
-    } else if (is_float(a) && is_float(b)) {
-        FloatDef af, bf;
-        GET_DOUBLE(a, af);
-        GET_DOUBLE(b, bf);
-        return float_comp(af.fd, bf.fd);
-    }
-    return erts_cmp_compound(a,b,exact,eq_only);
-}
-
-
 /* erts_cmp(Eterm a, Eterm b, int exact)
  * exact = 1 -> term-based compare
  * exact = 0 -> arith-based compare
@@ -2985,7 +2934,7 @@ tailrecur_ne:
 
 		    GET_DOUBLE(a, af);
 		    GET_DOUBLE(b, bf);
-		    ON_CMP_GOTO(float_comp(af.fd, bf.fd));
+		    ON_CMP_GOTO(erts_float_comp(af.fd, bf.fd));
 		}
 	    case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
 	    case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
@@ -3022,10 +2971,7 @@ tailrecur_ne:
 		    ErlFunThing* f2 = (ErlFunThing *) fun_val(b);
 		    Sint diff;
 
-		    diff = cmpbytes(atom_tab(atom_val(f1->fe->module))->name,
-				    atom_tab(atom_val(f1->fe->module))->len,
-				    atom_tab(atom_val(f2->fe->module))->name,
-				    atom_tab(atom_val(f2->fe->module))->len);
+                    diff = erts_cmp_atoms((f1->fe)->module, (f2->fe)->module);
 		    if (diff != 0) {
 			RETURN_NEQ(diff);
 		    }
@@ -3219,7 +3165,7 @@ tailrecur_ne:
 	    if (f2.fd < MAX_LOSSLESS_FLOAT && f2.fd > MIN_LOSSLESS_FLOAT) {
 		/* Float is within the no loss limit */
 		f1.fd = signed_val(aw);
-		j = float_comp(f1.fd, f2.fd);
+		j = erts_float_comp(f1.fd, f2.fd);
 	    }
 #if ERTS_SIZEOF_ETERM == 8
 	    else if (f2.fd > (double) (MAX_SMALL + 1)) {
@@ -3266,7 +3212,7 @@ tailrecur_ne:
 		if (big_to_double(aw, &f1.fd) < 0) {
 		    j = big_sign(aw) ? -1 : 1;
 		} else {
-		    j = float_comp(f1.fd, f2.fd);
+		    j = erts_float_comp(f1.fd, f2.fd);
 		}
 	    } else {
 		big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm));
@@ -3282,7 +3228,7 @@ tailrecur_ne:
 	    if (f1.fd < MAX_LOSSLESS_FLOAT && f1.fd > MIN_LOSSLESS_FLOAT) {
 		/* Float is within the no loss limit */
 		f2.fd = signed_val(bw);
-		j = float_comp(f1.fd, f2.fd);
+		j = erts_float_comp(f1.fd, f2.fd);
 	    }
 #if ERTS_SIZEOF_ETERM == 8
 	    else if (f1.fd > (double) (MAX_SMALL + 1)) {
-- 
cgit v1.2.3