Better list_to_integer

Now tries to use whole width of signed long (Sint) and this halves amount of multiplications needed to parse long integers. New code is 2-3 times faster than the old code for large inputs (tens and hundreds of digits), behavior should not change for small inputs. Test ran 10k times with GC forced between attempts. Was (R17): 720 el base 10: 0.14682 sec; base 16: 0.192722 sec; base 36: 0.337118 sec. 2800 el base 10: 1.794133 sec; base 16: 2.735106 sec; base 36: 4.761108 sec. 6500 el base 10: 9.316434 sec; base 16: 14.109469 sec; base 36: 25.319263 sec. Now (R19 Dev) 720 el base 10: 0.10265 sec; base 16: 0.10851 sec; base 36: 0.160478 sec. 2800 el base 10: 1.002793 sec; base 16: 1.360649 sec; base 36: 2.174309 sec. 6500 el base 10: 4.722197 sec; base 16: 6.60522 sec; base 36: 10.552795 sec. Added test for corner cases and sign bit corruption. Replaced macros with inline and hid it inside C file to not pollute global namespace Old bug in #define LG2_LOOKUP: Replaced with inline function and table recalculated for all bases 2 to 36 (was 2 to 64)
author: Dmytro Lytovchenko <dmytro.lytovchenko@gmail.com> 2016-01-21 17:20:47 +0100
committer: Dmytro Lytovchenko <dmytro.lytovchenko@gmail.com> 2016-02-02 11:32:58 +0100
commit: c96b6c2f58642b457d806c0a8a5bed03d16e35f1 (patch)
tree: 6b70c74b6d5e96559811589c4fb5ccef19ac3d6d /erts
parent: 0236a875929729eca1933cbb854267f584734b26 (diff)
download: otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.tar.gz
otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.tar.bz2
otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.zip
4 files changed, 316 insertions, 199 deletions
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index d519216fbd..34611ad6ab 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -2904,8 +2904,11 @@ BIF_RETTYPE integer_to_list_1(BIF_ALIST_1)
 
 /**********************************************************************/
 
-/* convert a list of ascii ascii integer value to an integer */
-
+/*
+ * Converts a list of ascii base10 digits to an integer fully or partially.
+ * Returns result and the remaining tail.
+ * On error returns: {error,not_a_list}, or {error, no_integer}
+ */
 
 BIF_RETTYPE string_to_integer_1(BIF_ALIST_1)
 {
@@ -2913,9 +2916,8 @@ BIF_RETTYPE string_to_integer_1(BIF_ALIST_1)
      Eterm tail;
      Eterm *hp;
      /* must be a list */
-     switch (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&tail)) {
-	 /* HAlloc after do_list_to_integer as it 
-	    might HAlloc itself (bignum) */
+     switch (erts_list_to_integer(BIF_P, BIF_ARG_1, 10, &res, &tail)) {
+     /* HAlloc after erts_list_to_integer as it might HAlloc itself (bignum) */
      case LTI_BAD_STRUCTURE:
 	 hp = HAlloc(BIF_P,3);
 	 BIF_RET(TUPLE2(hp, am_error, am_not_a_list));
@@ -2930,13 +2932,14 @@ BIF_RETTYPE string_to_integer_1(BIF_ALIST_1)
 
 BIF_RETTYPE list_to_integer_1(BIF_ALIST_1)
  {
-   /* Using do_list_to_integer is about twice as fast as using 
+   /* Using erts_list_to_integer is about twice as fast as using
       erts_chars_to_integer because we do not have to copy the 
       entire list */
      Eterm res;
      Eterm dummy;
      /* must be a list */
-     if (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&dummy) != LTI_ALL_INTEGER) {
+     if (erts_list_to_integer(BIF_P, BIF_ARG_1, 10,
+                              &res, &dummy) != LTI_ALL_INTEGER) {
 	 BIF_ERROR(BIF_P,BADARG);
      }
      BIF_RET(res);
@@ -2944,14 +2947,12 @@ BIF_RETTYPE list_to_integer_1(BIF_ALIST_1)
 
 BIF_RETTYPE list_to_integer_2(BIF_ALIST_2)
 {
-
   /* Bif implementation is about 50% faster than pure erlang,
      and since we have erts_chars_to_integer now it is simpler
      as well. This could be optmized further if we did not have to
      copy the list to buf. */
     int i;
-    Eterm res;
-    char *buf = NULL;
+    Eterm res, dummy;
     int base;
 
     i = erts_list_length(BIF_ARG_1);
@@ -2959,31 +2960,16 @@ BIF_RETTYPE list_to_integer_2(BIF_ALIST_2)
       BIF_ERROR(BIF_P, BADARG);
     
     base = signed_val(BIF_ARG_2);
-  
+
     if (base < 2 || base > 36) 
       BIF_ERROR(BIF_P, BADARG);
 
-    /* Take fast path if base it 10 */
-    if (base == 10)
-      return list_to_integer_1(BIF_P,&BIF_ARG_1);
-    
-    buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1);
-    
-    if (intlist_to_buf(BIF_ARG_1, buf, i) < 0)
-      goto list_to_integer_1_error;
-    buf[i] = '\0';		/* null terminal */
-    
-    if ((res = erts_chars_to_integer(BIF_P,buf,i,base)) == THE_NON_VALUE)
-      goto list_to_integer_1_error;
-    
-    erts_free(ERTS_ALC_T_TMP, (void *) buf);
+    if (erts_list_to_integer(BIF_P, BIF_ARG_1, base,
+                             &res, &dummy) != LTI_ALL_INTEGER) {
+        BIF_ERROR(BIF_P,BADARG);
+    }
     BIF_RET(res);
-    
- list_to_integer_1_error:
-    erts_free(ERTS_ALC_T_TMP, (void *) buf);
-    BIF_ERROR(BIF_P, BADARG);
-     
- }
+}
 
 /**********************************************************************/
 
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 29e677d2e5..11838e24ef 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -48,7 +48,7 @@
 	    _t_dst = (dst)+((sz)-1);				\
 	    _t_src = (src)+((sz)-1);				\
 	    while(_t_sz--) *_t_dst-- = *_t_src--;		\
-	}							\
+        }							\
     } while(0)
 
 /* add a and b with carry in + out */
@@ -423,6 +423,25 @@
 
 #endif
 
+/* Forward declaration of lookup tables (See below in this file) used in list to
+ * integer conversions for different bases. Also used in bignum printing.
+ */
+static const byte digits_per_sint_lookup[36-1];
+static const byte digits_per_small_lookup[36-1];
+static const Sint largest_power_of_base_lookup[36-1];
+
+static ERTS_INLINE byte get_digits_per_signed_int(Uint base) {
+    return digits_per_sint_lookup[base-2];
+}
+
+static ERTS_INLINE byte get_digits_per_small(Uint base) {
+    return digits_per_small_lookup[base-2];
+}
+
+static ERTS_INLINE Sint get_largest_power_of_base(Uint base) {
+    return largest_power_of_base_lookup[base-2];
+}
+
 /*
 ** compare two number vectors
 */
@@ -1719,8 +1738,10 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
     short sign = BIG_SIGN(xp);
     ErtsDigit rem;
     Uint n = 0;
+    const Uint digits_per_Sint = get_digits_per_signed_int(10);
+    const Sint largest_pow_of_base = get_largest_power_of_base(10);
 
-    if (xl == 1 && *dx < D_DECIMAL_BASE) {
+    if (xl == 1 && *dx < largest_pow_of_base) {
 	rem = *dx;
 	if (rem == 0) {
 	    (*write_func)(arg, '0'); n++;
@@ -1738,7 +1759,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
 	MOVE_DIGITS(tmp, dx, xl);
 
 	while(1) {
-	    tmpl = D_div(tmp, tmpl, D_DECIMAL_BASE, tmp, &rem);
+            tmpl = D_div(tmp, tmpl, largest_pow_of_base, tmp, &rem);
 	    if (tmpl == 1 && *tmp == 0) {
 		while(rem) {
 		    (*write_func)(arg, (rem % 10)+'0'); n++;
@@ -1746,7 +1767,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
 		}
 		break;
 	    } else {
-		int i = D_DECIMAL_EXP;
+                Uint i = digits_per_Sint;
 		while(i--) {
 		    (*write_func)(arg, (rem % 10)+'0'); n++;
 		    rem /= 10;
@@ -2522,63 +2543,100 @@ int term_equals_2pow32(Eterm x)
     }
 }
 
+static ERTS_INLINE int c2int_is_invalid_char(byte ch, int base) {
+    return (ch < '0'
+            || (ch > ('0' + base - 1)
+                && !(base > 10
+                     && ((ch >= 'a' && ch < ('a' + base - 10))
+                         || (ch >= 'A' && ch < ('A' + base - 10))))));
+}
 
-#define IS_VALID_CHARACTER(CHAR,BASE) \
-  (CHAR < '0'				    \
-   || (CHAR > ('0' + BASE - 1)		    \
-       && !(BASE > 10							\
-	    && ((CHAR >= 'a' && CHAR < ('a' + BASE - 10))		\
-		|| (CHAR >= 'A' && CHAR < ('A' + BASE - 10))))))
-#define CHARACTER_FROM_BASE(CHAR)					\
-  ((CHAR <= '9') ? CHAR - '0' : 10 + ((CHAR <= 'Z') ? CHAR - 'A' : CHAR - 'a'))
-#define D_BASE_EXP(BASE) (d_base_exp_lookup[BASE-2])
-#define D_BASE_BASE(BASE) (d_base_base_lookup[BASE-2])
-#define LG2_LOOKUP(BASE)  (lg2_lookup[base-2])
+static ERTS_INLINE byte c2int_digit_from_base(byte ch) {
+    return ch <= '9' ? ch - '0'
+            : (10 + (ch <= 'Z' ? ch - 'A' : ch - 'a'));
+}
 
 /*
- * for i in 2..64 do
- *   lg2_lookup[i-2] = log2(i)
- * end
- * How many bits are needed to store string of size n
+ * How many bits are needed to store 1 digit of given base in binary
+ * Wo.Alpha formula: Table [log2[n], {n,2,36}]
  */
-const double lg2_lookup[] = { 1.0, 1.58496, 2, 2.32193, 2.58496, 2.80735, 3.0,
-	3.16993, 3.32193, 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0,
-	4.08746, 4.16993, 4.24793, 4.32193, 4.39232, 4.45943, 4.52356, 4.58496,
-	4.64386, 4.70044, 4.75489, 4.80735, 4.85798, 4.90689, 4.9542, 5.0,
-	5.04439, 5.08746, 5.12928, 5.16993, 5.20945, 5.24793, 5.2854, 5.32193,
-	5.35755, 5.39232, 5.42626, 5.45943, 5.49185, 5.52356, 5.55459, 5.58496,
-	5.61471, 5.64386, 5.67243, 5.70044, 5.72792, 5.75489, 5.78136, 5.80735,
-	5.83289, 5.85798, 5.88264, 5.90689, 5.93074, 5.9542, 5.97728, 6.0 };
+static const double lg2_lookup[36-1] = {
+    1.0, 1.58496, 2.0, 2.32193, 2.58496, 2.80735, 3.0, 3.16993, 3.32193,
+    3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0, 4.08746, 4.16993, 4.24793,
+    4.32193, 4.39232, 4.45943, 4.52356, 4.58496, 4.64386, 4.70044, 4.75489,
+    4.80735, 4.85798, 4.90689, 4.9542, 5.0, 5.04439, 5.08746, 5.12928, 5.16993
+};
+static ERTS_INLINE double lookup_log2(Uint base) {
+    return lg2_lookup[base - 2];
+}
 
 /*
- * for i in 2..64 do
- *   d_base_exp_lookup[i-2] = 31 / lg2_lookup[i-2];
- * end
- * How many characters can fit in 31 bits
+ * How many digits can fit into a signed int (Sint) for given base, we take
+ * one digit away just to be on the safer side (some corner cases).
  */
-const byte d_base_exp_lookup[] = { 31, 19, 15, 13, 11, 11, 10, 9, 9, 8, 8, 8, 8,
-	7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
-	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-	5, 5 };
+static const byte digits_per_sint_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+    /* Wo.Alpha formula: Table [Trunc[31 / log[2,n]]-1, {n, 2, 36}] */
+    30, 18, 14, 12, 10, 10, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4
+#elif (SIZEOF_VOID_P == 8)
+    /* Wo.Alpha formula: Table [Trunc[63 / log[2,n]]-1, {n, 2, 36}] */
+    62, 38, 30, 26, 23, 21, 20, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11
+#else
+    #error "Please produce a lookup table for the new architecture"
+#endif
+};
 
 /*
- * for i in 2..64 do
- *   d_base_base_lookup[i-2] = pow(i,d_base_exp_lookup[i-2]);
- * end
- * How much can the characters which fit in 31 bit represent
+ * How many digits can fit into Erlang Small (SMALL_BITS-1) counting sign bit
  */
-const Uint d_base_base_lookup[] = { 2147483648u, 1162261467u, 1073741824u,
-	1220703125u, 362797056u, 1977326743u, 1073741824u, 387420489u,
-	1000000000u, 214358881u, 429981696u, 815730721u, 1475789056u,
-	170859375u, 268435456u, 410338673u, 612220032u, 893871739u, 1280000000u,
-	1801088541u, 113379904u, 148035889u, 191102976u, 244140625u, 308915776u,
-	387420489u, 481890304u, 594823321u, 729000000u, 887503681u, 1073741824u,
-	1291467969u, 1544804416u, 1838265625u, 60466176u, 69343957u, 79235168u,
-	90224199u, 102400000u, 115856201u, 130691232u, 147008443u, 164916224u,
-	184528125u, 205962976u, 229345007u, 254803968u, 282475249u, 312500000u,
-	345025251u, 380204032u, 418195493u, 459165024u, 503284375u, 550731776u,
-	601692057u, 656356768u, 714924299u, 777600000u, 844596301u, 916132832u,
-	992436543u, 1073741824u };
+static const byte digits_per_small_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+    /* Wo.Alpha formula: Table [Trunc[27 / log[2,n]]-1, {n, 2, 36}] */
+    27, 17, 13, 11, 10, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+#elif (SIZEOF_VOID_P == 8)
+    /* Wo.Alpha formula: Table [Trunc[59 / log[2,n]]-1, {n, 2, 36}] */
+    59, 37, 29, 25, 22, 21, 19, 18, 17, 17, 16, 15, 15, 15, 14, 14, 14, 13, 13,
+    13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11
+#else
+    #error "Please produce a lookup table for the new architecture"
+#endif
+};
+
+/*
+ * Largest power of base which can be represented in a signed int (Sint).
+ * Calculated by base 2..36 to the power of corresponding element from
+ * digits_per_sint_lookup.
+ */
+static const Sint largest_power_of_base_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+    /* Wo.Alpha formula: Table [Pow[n, Trunc[31 / log[2,n]]-1], {n, 2, 36}] */
+    1073741824, 387420489, 268435456, 244140625, 60466176, 282475249, 134217728,
+    43046721, 100000000, 19487171, 35831808, 62748517, 105413504, 11390625,
+    16777216, 24137569, 34012224, 47045881, 64000000, 85766121, 5153632,
+    6436343,7962624, 9765625, 11881376, 14348907, 17210368, 20511149, 24300000,
+    28629151, 33554432, 39135393, 45435424, 52521875, 1679616
+#elif (SIZEOF_VOID_P == 8)
+    /* Wo.Alpha formula: Table [Pow[n, Trunc[63 / log[2,n]]-1], {n, 2, 36}]
+     * with LL added after each element manually */
+    4611686018427387904LL, 1350851717672992089LL, 1152921504606846976LL,
+    1490116119384765625LL, 789730223053602816LL, 558545864083284007LL,
+    1152921504606846976LL, 150094635296999121LL, 100000000000000000LL,
+    505447028499293771LL, 184884258895036416LL, 665416609183179841LL,
+    155568095557812224LL, 437893890380859375LL, 72057594037927936LL,
+    168377826559400929LL, 374813367582081024LL, 42052983462257059LL,
+    81920000000000000LL, 154472377739119461LL, 282810057883082752LL,
+    21914624432020321LL, 36520347436056576LL, 59604644775390625LL,
+    95428956661682176LL, 150094635296999121LL, 232218265089212416LL,
+    12200509765705829LL, 17714700000000000LL, 25408476896404831LL,
+    36028797018963968LL, 50542106513726817LL, 70188843638032384LL,
+    96549157373046875LL, 131621703842267136LL
+#else
+    #error "Please produce a lookup table for the new architecture"
+#endif
+};
 
 Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, 
 			   Uint size, const int base) {
@@ -2588,8 +2646,11 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
     int neg = 0;
     byte b;
     Eterm *hp, *hp_end;
-    int m;
+    Sint m;
     int lg2;
+    const Uint digits_per_small = get_digits_per_small(base);
+    const Uint digits_per_Sint = get_digits_per_signed_int(base);
+    const Sint largest_pow_of_base = get_largest_power_of_base(base);
 
     if (size == 0)
 	goto bytebuf_to_integer_1_error;
@@ -2604,57 +2665,68 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
 	size--;
     }
 
+    /* Trim leading zeroes */
+    if (size) {
+        while (*bytes == '0') {
+            bytes++;
+            size--;
+            if (!size) {
+                /* All zero! */
+                res = make_small(0);
+                goto bytebuf_to_integer_1_done;
+            }
+        }
+    }
+
     if (size == 0)
 	goto bytebuf_to_integer_1_error;
 
-    if (size < SMALL_DIGITS && base <= 10) {
-	/* *
-	 * Take shortcut if we know that all chars are '0' < b < '9' and
-	 * fit in a small. This improves speed by about 10% over the generic
-	 * small case.
-	 * */
-	while (size--) {
-	    b = *bytes++;
+    if (size < digits_per_small) {
+        if (base <= 10) {
+            /* *
+         * Take shortcut if we know that all chars are '0' < b < '9' and
+         * fit in a small. This improves speed by about 10% over the generic
+         * small case.
+         * */
+            while (size--) {
+                b = *bytes++;
 
-	    if (b < '0' || b > ('0'+base-1))
-		goto bytebuf_to_integer_1_error;
+                if (b < '0' || b > ('0'+base-1))
+                    goto bytebuf_to_integer_1_error;
 
-	    i = i * base + b - '0';
-	}
+                i = i * base + b - '0';
+            }
 
-	if (neg)
-	    i = -i;
-	res = make_small(i);
-	goto bytebuf_to_integer_1_done;
+            if (neg)
+                i = -i;
+            res = make_small(i);
+            goto bytebuf_to_integer_1_done;
+        }
+
+        /* Take shortcut if we know it will fit in a small.
+         * This improves speed by about 30%.
+         */
+        while (size) {
+            b = *bytes++;
+            size--;
+
+            if (c2int_is_invalid_char(b, base))
+                goto bytebuf_to_integer_1_error;
+
+            i = i * base + c2int_digit_from_base(b);
+        }
+
+        if (neg)
+            i = -i;
+        res = make_small(i);
+        goto bytebuf_to_integer_1_done;
     }
 
     /*
      * Calculate the maximum number of bits which will
      * be needed to represent the binary
      */
-    lg2 = ((size+2)*LG2_LOOKUP(base)+1);
-
-    if (lg2 < SMALL_BITS) {
-	/* Take shortcut if we know it will fit in a small.
-	 * This improves speed by about 30%.
-	 */
-	while (size) {
-	    b = *bytes++;
-	    size--;
-
-	    if (IS_VALID_CHARACTER(b,base))
-		goto bytebuf_to_integer_1_error;
-
-	    i = i * base + CHARACTER_FROM_BASE(b);
-
-	}
-
-	if (neg)
-	    i = -i;
-	res = make_small(i);
-	goto bytebuf_to_integer_1_done;
-
-    }
+    lg2 = ((size+2)*lookup_log2(base)+1);
 
     /* Start calculating bignum */
     m = (lg2 + D_EXP-1)/D_EXP;
@@ -2663,8 +2735,8 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
     hp = HAlloc(BIF_P, m);
     hp_end = hp + m;
 
-    if ((i = (size % D_BASE_EXP(base))) == 0)
-	i = D_BASE_EXP(base);
+    if ((i = (size % digits_per_Sint)) == 0)
+        i = digits_per_Sint;
 
     n = size - i;
     m = 0;
@@ -2672,34 +2744,34 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
     while (i--) {
 	b = *bytes++;
 
-	if (IS_VALID_CHARACTER(b,base)) {
+        if (c2int_is_invalid_char(b,base)) {
 	    HRelease(BIF_P, hp_end, hp);
 	    goto bytebuf_to_integer_1_error;
 	}
 
-	m = base * m + CHARACTER_FROM_BASE(b);
+        m = base * m + c2int_digit_from_base(b);
     }
 
     res = small_to_big(m, hp);
 
     while (n) {
-	i = D_BASE_EXP(base);
-	n -= D_BASE_EXP(base);
+        i = digits_per_Sint;
+        n -= digits_per_Sint;
 	m = 0;
 	while (i--) {
 	    b = *bytes++;
 
-	    if (IS_VALID_CHARACTER(b,base)) {
+            if (c2int_is_invalid_char(b,base)) {
 	      HRelease(BIF_P, hp_end, hp);
 	      goto bytebuf_to_integer_1_error;
 	    }
 
-	    m = base * m + CHARACTER_FROM_BASE(b);
+            m = base * m + c2int_digit_from_base(b);
 	}
 	if (is_small(res)) {
 	    res = small_to_big(signed_val(res), hp);
 	}
-	res = big_times_small(res, D_BASE_BASE(base), hp);
+        res = big_times_small(res, largest_pow_of_base, hp);
 	if (is_small(res)) {
 	    res = small_to_big(signed_val(res), hp);
 	}
@@ -2730,31 +2802,35 @@ bytebuf_to_integer_1_error:
 
 bytebuf_to_integer_1_done:
     return res;
-
 }
 
-int do_list_to_integer(Process *p, Eterm orig_list,
-                       Eterm *integer, Eterm *rest)
+/* Converts list of digits with given 'base' to integer sequentially. Returns
+ * result in 'integer_out', remaining tail goes to 'tail_out' and returns result
+ * code if the list was consumed fully or partially or there was an error
+ */
+LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list,
+                                  const Uint base,
+                                  Eterm *integer_out, Eterm *tail_out)
 {
      Sint i = 0;
      Uint ui = 0;
      int skip = 0;
      int neg = 0;
      Sint n = 0;
-     int m;
+     Sint m;
      int lg2;
      Eterm res;
-     Eterm* hp;
-     Eterm *hp_end;
      Eterm lst = orig_list;
      Eterm tail = lst;
      int error_res = LTI_BAD_STRUCTURE;
+     const Uint digits_per_small = get_digits_per_small(base);
+     const Uint digits_per_Sint = get_digits_per_signed_int(base);
 
      if (is_nil(lst)) {
        error_res = LTI_NO_INTEGER;
      error:
-         *rest = tail;
-         *integer = make_small(0);
+         *tail_out = tail;
+         *integer_out = make_small(0);
          return error_res;
      }
      if (is_not_list(lst))
@@ -2784,15 +2860,16 @@ int do_list_to_integer(Process *p, Eterm orig_list,
      /* Calculate size and do type check */
 
      while(1) {
+         byte ch;
          if (is_not_small(CAR(list_val(lst)))) {
              break;
          }
-         if (unsigned_val(CAR(list_val(lst))) < '0' ||
-             unsigned_val(CAR(list_val(lst))) > '9') {
+         ch = unsigned_val(CAR(list_val(lst)));
+         if (c2int_is_invalid_char(ch, base)) {
              break;
          }
-         ui = ui * 10;
-         ui = ui + unsigned_val(CAR(list_val(lst))) - '0';
+         ui = ui * base;
+         ui = ui + c2int_digit_from_base(ch);
          n++;
          lst = CDR(list_val(lst));
          if (is_nil(lst)) {
@@ -2810,23 +2887,24 @@ int do_list_to_integer(Process *p, Eterm orig_list,
      }
 
 
-      /* If n <= 8 then we know it's a small int
-      ** since 2^27 = 134217728. If n > 8 then we must
-      ** construct a bignum and let that routine do the checking
-      */
+      /* If length fits inside Sint then we know it's a small int. Else we
+       * must construct a bignum and let that routine do the checking
+       */
 
-     if (n <= SMALL_DIGITS) {  /* It must be small */
-         if (neg) i = -(Sint)ui;
-         else i = (Sint)ui;
+     if (n <= digits_per_small) {  /* It must be small */
+         i = neg ? -(Sint)ui : (Sint)ui;
          res = make_small(i);
      } else {
-         /* Convert from log10 to log2 by multiplying with 1/log10(2)=3.3219
-            which we round up to (3 + 1/3) */
-         lg2 = (n+1)*3 + (n+1)/3 + 1;
+         const Sint largest_pow_of_base = get_largest_power_of_base(base);
+         Eterm *hp;
+         Eterm *hp_end;
+
+         /* Convert from log_base to log2 using lookup table */
+         lg2 = ((n+2)*lookup_log2(base)+1);
          m  = (lg2+D_EXP-1)/D_EXP; /* number of digits */
          m  = BIG_NEED_SIZE(m);    /* number of words + thing */
 
-         hp = HAlloc(p, m);
+         hp = HAlloc(BIF_P, m);
          hp_end = hp + m;
 
          lst = orig_list;
@@ -2834,27 +2912,29 @@ int do_list_to_integer(Process *p, Eterm orig_list,
              lst = CDR(list_val(lst));
 
          /* load first digits (at least one digit) */
-         if ((i = (n % D_DECIMAL_EXP)) == 0)
-             i = D_DECIMAL_EXP;
+         if ((i = (n % digits_per_Sint)) == 0)
+             i = digits_per_Sint;
          n -= i;
          m = 0;
          while(i--) {
-             m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0');
+             m *= base;
+             m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst))));
              lst = CDR(list_val(lst));
          }
          res = small_to_big(m, hp);  /* load first digits */
 
          while(n) {
-             i = D_DECIMAL_EXP;
-             n -= D_DECIMAL_EXP;
+             i = digits_per_Sint;
+             n -= digits_per_Sint;
              m = 0;
              while(i--) {
-                 m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0');
+                 m *= base;
+                 m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst))));
                  lst = CDR(list_val(lst));
              }
              if (is_small(res))
                  res = small_to_big(signed_val(res), hp);
-             res = big_times_small(res, D_DECIMAL_BASE, hp);
+             res = big_times_small(res, largest_pow_of_base, hp);
              if (is_small(res))
                  res = small_to_big(signed_val(res), hp);
              res = big_plus_small(res, m, hp);
@@ -2876,10 +2956,10 @@ int do_list_to_integer(Process *p, Eterm orig_list,
                  hp += (big_arity(res)+1);
              }
          }
-         HRelease(p,hp_end,hp);
+         HRelease(BIF_P, hp_end, hp);
      }
-     *integer = res;
-     *rest = tail;
+     *integer_out = res;
+     *tail_out = tail;
      if (tail != NIL) {
          return LTI_SOME_INTEGER;
      }
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index 85807d6eea..9c92de6b55 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -54,9 +54,6 @@ typedef Uint32   ErtsHalfDigit;
 #error "can not determine machine size"
 #endif
 
-#define D_DECIMAL_EXP	9
-#define D_DECIMAL_BASE	1000000000
-
 typedef Uint  dsize_t;	 /* Vector size type */
 
 #define D_EXP (ERTS_SIZEOF_ETERM*8)
@@ -173,12 +170,15 @@ Eterm erts_sint64_to_big(Sint64, Eterm **);
 
 Eterm erts_chars_to_integer(Process *, char*, Uint, const int);
 
-#define LTI_BAD_STRUCTURE 0
-#define LTI_NO_INTEGER 1
-#define LTI_SOME_INTEGER 2
-#define LTI_ALL_INTEGER 3
-
-int do_list_to_integer(Process *p, Eterm orig_list,
-                       Eterm *integer, Eterm *rest);
-
+/* How list_to_integer classifies the input, was it even a string? */
+typedef enum {
+    LTI_BAD_STRUCTURE = 0,
+    LTI_NO_INTEGER    = 1,
+    LTI_SOME_INTEGER  = 2,
+    LTI_ALL_INTEGER   = 3
+} LTI_result_t;
+
+LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list,
+                                  const Uint base,
+                                  Eterm *integer_out, Eterm *tail_out);
 #endif
diff --git a/erts/emulator/test/num_bif_SUITE.erl b/erts/emulator/test/num_bif_SUITE.erl
index 90b6a36262..d0840fe731 100644
--- a/erts/emulator/test/num_bif_SUITE.erl
+++ b/erts/emulator/test/num_bif_SUITE.erl
@@ -1,8 +1,8 @@
 %%
 %% %CopyrightBegin%
-%% 
+%%
 %% Copyright Ericsson AB 1997-2014. All Rights Reserved.
-%% 
+%%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
 %% You may obtain a copy of the License at
@@ -14,7 +14,7 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 
@@ -36,22 +36,22 @@
 %%	integer_to_binary/2
 %%	binary_to_integer/1
 
--export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1, 
+-export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1,
 	 init_per_group/2, end_per_group/2, t_abs/1, t_float/1,
 	 t_float_to_string/1, t_integer_to_string/1,
-	 t_string_to_integer/1,
+	 t_string_to_integer/1, t_list_to_integer_edge_cases/1,
 	 t_string_to_float_safe/1, t_string_to_float_risky/1,
 	 t_round/1, t_trunc/1
      ]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
-all() -> 
+all() ->
     [t_abs, t_float, t_float_to_string, t_integer_to_string,
      {group, t_string_to_float}, t_string_to_integer, t_round,
-     t_trunc].
+     t_trunc, t_list_to_integer_edge_cases].
 
-groups() -> 
+groups() ->
     [{t_string_to_float, [],
       [t_string_to_float_safe, t_string_to_float_risky]}].
 
@@ -73,7 +73,7 @@ t_abs(Config) when is_list(Config) ->
     5.5 = abs(id(5.5)),
     0.0 = abs(id(0.0)),
     100.0 = abs(id(-100.0)),
-    
+
     %% Integers.
     5 = abs(id(5)),
     0 = abs(id(0)),
@@ -93,7 +93,7 @@ t_abs(Config) when is_list(Config) ->
     BigNum = abs(BigNum),
     BigNum = abs(-BigNum),
     ok.
-    
+
 t_float(Config) when is_list(Config) ->
     0.0 = float(id(0)),
     2.5 = float(id(2.5)),
@@ -109,7 +109,7 @@ t_float(Config) when is_list(Config) ->
     %% Extremly big bignums.
     Big = id(list_to_integer(id(lists:duplicate(2000, $1)))),
     {'EXIT', {badarg, _}} = (catch float(Big)),
-    
+
     ok.
 
 
@@ -183,7 +183,7 @@ t_float_to_string(Config) when is_list(Config) ->
     test_fts("1.2300000000e+20",1.23e20, [{scientific, 10}, compact]),
     test_fts("1.23000000000000000000e+20",1.23e20, []),
     ok.
-    
+
 test_fts(Expect, Float) ->
     Expect = float_to_list(Float),
     BinExpect = list_to_binary(Expect),
@@ -255,7 +255,7 @@ t_round(Config) when is_list(Config) ->
     256 = round(id(255.6)),
     -1033 = round(id(-1033.3)),
     -1034 = round(id(-1033.6)),
-    
+
     % OTP-3722:
     X = id((1 bsl 27) - 1),
     MX = -X,
@@ -345,9 +345,9 @@ t_integer_to_string(Config) when is_list(Config) ->
 
     %% Invalid types
     lists:foreach(fun(Value) ->
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:integer_to_binary(Value)),
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:integer_to_list(Value))
 		  end,[atom,1.2,0.0,[$1,[$2]]]),
 
@@ -416,27 +416,27 @@ t_string_to_integer(Config) when is_list(Config) ->
 
     %% Invalid types
     lists:foreach(fun(Value) ->
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch binary_to_integer(Value)),
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:list_to_integer(Value))
 		  end,[atom,1.2,0.0,[$1,[$2]]]),
-    
+
     % Default base error cases
     lists:foreach(fun(Value) ->
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:binary_to_integer(
 				       list_to_binary(Value))),
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:list_to_integer(Value))
 		  end,["1.0"," 1"," -1","","+"]),
-    
+
     % Custom base error cases
     lists:foreach(fun({Value,Base}) ->
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch binary_to_integer(
 				       list_to_binary(Value),Base)),
-			  {'EXIT', {badarg, _}} = 
+			  {'EXIT', {badarg, _}} =
 			      (catch erlang:list_to_integer(Value,Base))
 		  end,[{" 1",1},{" 1",37},{"2",2},{"C",11},
 		       {"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111z",16},
@@ -449,10 +449,61 @@ t_string_to_integer(Config) when is_list(Config) ->
 
     ok.
 
+%% Tests edge cases for list_to_integer; compares with known good values
+
+t_list_to_integer_edge_cases(Config) when is_list(Config) ->
+    %% Take integer literals and compare to their representation in ExtTerm
+    T = [
+        {16, "0", <<131,97,0>>},
+        {16, "-0", <<131,97,0>>},
+
+        {16, "f", <<131,97,15>>},
+        {16, "-f", <<131,98,255,255,255,241>>},
+
+        {16, "0000000000000000000000000000000000000000000000000f",
+            <<131,97,15>>},
+        {16, "-0000000000000000000000000000000000000000000000000f",
+            <<131,98,255,255,255,241>>},
+
+        {16, "ffffffff", <<131,110,4,0,255,255,255,255>>},
+        {16, "-ffffffff", <<131,110,4,1,255,255,255,255>>},
+
+        {16, "7fffffff", <<131,110,4,0,255,255,255,127>>},
+        {16, "-7fffffff", <<131,98,128,0,0,1>>},
+
+        {16, "ffffffffffffffff",
+            <<131,110,8,0,255,255,255,255,255,255,255,255>>},
+        {16, "-ffffffffffffffff",
+            <<131,110,8,1,255,255,255,255,255,255,255,255>>},
+
+        {16, "7fffffffffffffff",
+            <<131,110,8,0,255,255,255,255,255,255,255,127>>},
+        {16, "-7fffffffffffffff",
+            <<131,110,8,1,255,255,255,255,255,255,255,127>>},
+
+        %% Alleged 32-bit corner case (should not happen on 64-bit). At 32-4
+        %% bits we may corrupt sign bit and fall out of SMALL_INT range.
+        {2, "1000000000000000000000000000", <<131,98,8,0,0,0>>},
+        {2, "-1000000000000000000000000000", <<131,98,248,0,0,0>>},
+
+        %% 64-bit corner case (should not happen on 32-bit) at 64-4 bits we
+        %% corrupt sign bit and fall out of SMALL_INT range (bam! all dead)
+        {2, "100000000000000000000000000000000000000000000000000000000000",
+            <<131,110,8,0,0,0,0,0,0,0,0,8>>},
+        {2, "-100000000000000000000000000000000000000000000000000000000000",
+            <<131,110,8,1,0,0,0,0,0,0,0,8>>}
+    ],
+    [begin
+         io:format("~s base ~p vs ~p~n", [Str, Base, Bin]),
+         FromStr = list_to_integer(Str, Base),
+         FromStr = binary_to_term(Bin)
+     end || {Base, Str, Bin} <- T],
+    ok.
+
 test_sti(Num) ->
     [begin
 	 io:format("Testing ~p:~p",[Num,Base]),
-	 test_sti(Num,Base) 
+	 test_sti(Num,Base)
      end|| Base <- lists:seq(2,36)].
 
 test_sti(Num,Base) ->
author	Dmytro Lytovchenko <dmytro.lytovchenko@gmail.com>	2016-01-21 17:20:47 +0100
committer	Dmytro Lytovchenko <dmytro.lytovchenko@gmail.com>	2016-02-02 11:32:58 +0100
commit	c96b6c2f58642b457d806c0a8a5bed03d16e35f1 (patch)
tree	6b70c74b6d5e96559811589c4fb5ccef19ac3d6d /erts
parent	0236a875929729eca1933cbb854267f584734b26 (diff)
download	otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.tar.gz otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.tar.bz2 otp-c96b6c2f58642b457d806c0a8a5bed03d16e35f1.zip