aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2016-02-03 14:42:45 +0100
committerLukas Larsson <[email protected]>2016-02-03 14:42:45 +0100
commitb200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd (patch)
tree6806637c88ccaff8bb6d28d3b3e1b49ca4c19f6a
parent0cf09adc86c87e8b25005c6c305cb419b050c344 (diff)
parentc96b6c2f58642b457d806c0a8a5bed03d16e35f1 (diff)
downloadotp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.tar.gz
otp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.tar.bz2
otp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.zip
Merge branch 'kvakvs/erts/list_to_integer/OTP-13293'
* kvakvs/erts/list_to_integer/OTP-13293: Better list_to_integer Moved do_list_to_integer from bif.c to big.c
-rw-r--r--erts/emulator/beam/bif.c205
-rw-r--r--erts/emulator/beam/big.c433
-rw-r--r--erts/emulator/beam/big.h14
-rw-r--r--erts/emulator/test/num_bif_SUITE.erl99
4 files changed, 436 insertions, 315 deletions
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index 7e8eb44680..e116b10b95 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -2917,175 +2917,20 @@ BIF_RETTYPE integer_to_list_1(BIF_ALIST_1)
/**********************************************************************/
-/* convert a list of ascii ascii integer value to an integer */
-
-
-#define LTI_BAD_STRUCTURE 0
-#define LTI_NO_INTEGER 1
-#define LTI_SOME_INTEGER 2
-#define LTI_ALL_INTEGER 3
-
-static int do_list_to_integer(Process *p, Eterm orig_list,
- Eterm *integer, Eterm *rest)
-{
- Sint i = 0;
- Uint ui = 0;
- int skip = 0;
- int neg = 0;
- Sint n = 0;
- int m;
- int lg2;
- Eterm res;
- Eterm* hp;
- Eterm *hp_end;
- Eterm lst = orig_list;
- Eterm tail = lst;
- int error_res = LTI_BAD_STRUCTURE;
-
- if (is_nil(lst)) {
- error_res = LTI_NO_INTEGER;
- error:
- *rest = tail;
- *integer = make_small(0);
- return error_res;
- }
- if (is_not_list(lst))
- goto error;
-
- /* if first char is a '-' then it is a negative integer */
- if (CAR(list_val(lst)) == make_small('-')) {
- neg = 1;
- skip = 1;
- lst = CDR(list_val(lst));
- if (is_not_list(lst)) {
- tail = lst;
- error_res = LTI_NO_INTEGER;
- goto error;
- }
- } else if (CAR(list_val(lst)) == make_small('+')) {
- /* ignore plus */
- skip = 1;
- lst = CDR(list_val(lst));
- if (is_not_list(lst)) {
- tail = lst;
- error_res = LTI_NO_INTEGER;
- goto error;
- }
- }
-
- /* Calculate size and do type check */
-
- while(1) {
- if (is_not_small(CAR(list_val(lst)))) {
- break;
- }
- if (unsigned_val(CAR(list_val(lst))) < '0' ||
- unsigned_val(CAR(list_val(lst))) > '9') {
- break;
- }
- ui = ui * 10;
- ui = ui + unsigned_val(CAR(list_val(lst))) - '0';
- n++;
- lst = CDR(list_val(lst));
- if (is_nil(lst)) {
- break;
- }
- if (is_not_list(lst)) {
- break;
- }
- }
-
- tail = lst;
- if (!n) {
- error_res = LTI_NO_INTEGER;
- goto error;
- }
-
-
- /* If n <= 8 then we know it's a small int
- ** since 2^27 = 134217728. If n > 8 then we must
- ** construct a bignum and let that routine do the checking
- */
-
- if (n <= SMALL_DIGITS) { /* It must be small */
- if (neg) i = -(Sint)ui;
- else i = (Sint)ui;
- res = make_small(i);
- } else {
- /* Convert from log10 to log2 by multiplying with 1/log10(2)=3.3219
- which we round up to (3 + 1/3) */
- lg2 = (n+1)*3 + (n+1)/3 + 1;
- m = (lg2+D_EXP-1)/D_EXP; /* number of digits */
- m = BIG_NEED_SIZE(m); /* number of words + thing */
-
- hp = HAlloc(p, m);
- hp_end = hp + m;
-
- lst = orig_list;
- if (skip)
- lst = CDR(list_val(lst));
-
- /* load first digits (at least one digit) */
- if ((i = (n % D_DECIMAL_EXP)) == 0)
- i = D_DECIMAL_EXP;
- n -= i;
- m = 0;
- while(i--) {
- m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0');
- lst = CDR(list_val(lst));
- }
- res = small_to_big(m, hp); /* load first digits */
-
- while(n) {
- i = D_DECIMAL_EXP;
- n -= D_DECIMAL_EXP;
- m = 0;
- while(i--) {
- m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0');
- lst = CDR(list_val(lst));
- }
- if (is_small(res))
- res = small_to_big(signed_val(res), hp);
- res = big_times_small(res, D_DECIMAL_BASE, hp);
- if (is_small(res))
- res = small_to_big(signed_val(res), hp);
- res = big_plus_small(res, m, hp);
- }
-
- if (neg) {
- if (is_small(res))
- res = make_small(-signed_val(res));
- else {
- Uint *big = big_val(res); /* point to thing */
- *big = bignum_header_neg(*big);
- }
- }
-
- if (is_not_small(res)) {
- res = big_plus_small(res, 0, hp); /* includes conversion to small */
+/*
+ * Converts a list of ascii base10 digits to an integer fully or partially.
+ * Returns result and the remaining tail.
+ * On error returns: {error,not_a_list}, or {error, no_integer}
+ */
- if (is_not_small(res)) {
- hp += (big_arity(res)+1);
- }
- }
- HRelease(p,hp_end,hp);
- }
- *integer = res;
- *rest = tail;
- if (tail != NIL) {
- return LTI_SOME_INTEGER;
- }
- return LTI_ALL_INTEGER;
-}
BIF_RETTYPE string_to_integer_1(BIF_ALIST_1)
{
Eterm res;
Eterm tail;
Eterm *hp;
/* must be a list */
- switch (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&tail)) {
- /* HAlloc after do_list_to_integer as it
- might HAlloc itself (bignum) */
+ switch (erts_list_to_integer(BIF_P, BIF_ARG_1, 10, &res, &tail)) {
+ /* HAlloc after erts_list_to_integer as it might HAlloc itself (bignum) */
case LTI_BAD_STRUCTURE:
hp = HAlloc(BIF_P,3);
BIF_RET(TUPLE2(hp, am_error, am_not_a_list));
@@ -3100,13 +2945,14 @@ BIF_RETTYPE string_to_integer_1(BIF_ALIST_1)
BIF_RETTYPE list_to_integer_1(BIF_ALIST_1)
{
- /* Using do_list_to_integer is about twice as fast as using
+ /* Using erts_list_to_integer is about twice as fast as using
erts_chars_to_integer because we do not have to copy the
entire list */
Eterm res;
Eterm dummy;
/* must be a list */
- if (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&dummy) != LTI_ALL_INTEGER) {
+ if (erts_list_to_integer(BIF_P, BIF_ARG_1, 10,
+ &res, &dummy) != LTI_ALL_INTEGER) {
BIF_ERROR(BIF_P,BADARG);
}
BIF_RET(res);
@@ -3114,14 +2960,12 @@ BIF_RETTYPE list_to_integer_1(BIF_ALIST_1)
BIF_RETTYPE list_to_integer_2(BIF_ALIST_2)
{
-
/* Bif implementation is about 50% faster than pure erlang,
and since we have erts_chars_to_integer now it is simpler
as well. This could be optmized further if we did not have to
copy the list to buf. */
int i;
- Eterm res;
- char *buf = NULL;
+ Eterm res, dummy;
int base;
i = erts_list_length(BIF_ARG_1);
@@ -3129,31 +2973,16 @@ BIF_RETTYPE list_to_integer_2(BIF_ALIST_2)
BIF_ERROR(BIF_P, BADARG);
base = signed_val(BIF_ARG_2);
-
+
if (base < 2 || base > 36)
BIF_ERROR(BIF_P, BADARG);
- /* Take fast path if base it 10 */
- if (base == 10)
- return list_to_integer_1(BIF_P,&BIF_ARG_1);
-
- buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1);
-
- if (intlist_to_buf(BIF_ARG_1, buf, i) < 0)
- goto list_to_integer_1_error;
- buf[i] = '\0'; /* null terminal */
-
- if ((res = erts_chars_to_integer(BIF_P,buf,i,base)) == THE_NON_VALUE)
- goto list_to_integer_1_error;
-
- erts_free(ERTS_ALC_T_TMP, (void *) buf);
+ if (erts_list_to_integer(BIF_P, BIF_ARG_1, base,
+ &res, &dummy) != LTI_ALL_INTEGER) {
+ BIF_ERROR(BIF_P,BADARG);
+ }
BIF_RET(res);
-
- list_to_integer_1_error:
- erts_free(ERTS_ALC_T_TMP, (void *) buf);
- BIF_ERROR(BIF_P, BADARG);
-
- }
+}
/**********************************************************************/
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 02d37e24df..11838e24ef 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -48,7 +48,7 @@
_t_dst = (dst)+((sz)-1); \
_t_src = (src)+((sz)-1); \
while(_t_sz--) *_t_dst-- = *_t_src--; \
- } \
+ } \
} while(0)
/* add a and b with carry in + out */
@@ -423,6 +423,25 @@
#endif
+/* Forward declaration of lookup tables (See below in this file) used in list to
+ * integer conversions for different bases. Also used in bignum printing.
+ */
+static const byte digits_per_sint_lookup[36-1];
+static const byte digits_per_small_lookup[36-1];
+static const Sint largest_power_of_base_lookup[36-1];
+
+static ERTS_INLINE byte get_digits_per_signed_int(Uint base) {
+ return digits_per_sint_lookup[base-2];
+}
+
+static ERTS_INLINE byte get_digits_per_small(Uint base) {
+ return digits_per_small_lookup[base-2];
+}
+
+static ERTS_INLINE Sint get_largest_power_of_base(Uint base) {
+ return largest_power_of_base_lookup[base-2];
+}
+
/*
** compare two number vectors
*/
@@ -1719,8 +1738,10 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
short sign = BIG_SIGN(xp);
ErtsDigit rem;
Uint n = 0;
+ const Uint digits_per_Sint = get_digits_per_signed_int(10);
+ const Sint largest_pow_of_base = get_largest_power_of_base(10);
- if (xl == 1 && *dx < D_DECIMAL_BASE) {
+ if (xl == 1 && *dx < largest_pow_of_base) {
rem = *dx;
if (rem == 0) {
(*write_func)(arg, '0'); n++;
@@ -1738,7 +1759,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
MOVE_DIGITS(tmp, dx, xl);
while(1) {
- tmpl = D_div(tmp, tmpl, D_DECIMAL_BASE, tmp, &rem);
+ tmpl = D_div(tmp, tmpl, largest_pow_of_base, tmp, &rem);
if (tmpl == 1 && *tmp == 0) {
while(rem) {
(*write_func)(arg, (rem % 10)+'0'); n++;
@@ -1746,7 +1767,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg)
}
break;
} else {
- int i = D_DECIMAL_EXP;
+ Uint i = digits_per_Sint;
while(i--) {
(*write_func)(arg, (rem % 10)+'0'); n++;
rem /= 10;
@@ -2522,63 +2543,100 @@ int term_equals_2pow32(Eterm x)
}
}
+static ERTS_INLINE int c2int_is_invalid_char(byte ch, int base) {
+ return (ch < '0'
+ || (ch > ('0' + base - 1)
+ && !(base > 10
+ && ((ch >= 'a' && ch < ('a' + base - 10))
+ || (ch >= 'A' && ch < ('A' + base - 10))))));
+}
-#define IS_VALID_CHARACTER(CHAR,BASE) \
- (CHAR < '0' \
- || (CHAR > ('0' + BASE - 1) \
- && !(BASE > 10 \
- && ((CHAR >= 'a' && CHAR < ('a' + BASE - 10)) \
- || (CHAR >= 'A' && CHAR < ('A' + BASE - 10))))))
-#define CHARACTER_FROM_BASE(CHAR) \
- ((CHAR <= '9') ? CHAR - '0' : 10 + ((CHAR <= 'Z') ? CHAR - 'A' : CHAR - 'a'))
-#define D_BASE_EXP(BASE) (d_base_exp_lookup[BASE-2])
-#define D_BASE_BASE(BASE) (d_base_base_lookup[BASE-2])
-#define LG2_LOOKUP(BASE) (lg2_lookup[base-2])
+static ERTS_INLINE byte c2int_digit_from_base(byte ch) {
+ return ch <= '9' ? ch - '0'
+ : (10 + (ch <= 'Z' ? ch - 'A' : ch - 'a'));
+}
/*
- * for i in 2..64 do
- * lg2_lookup[i-2] = log2(i)
- * end
- * How many bits are needed to store string of size n
+ * How many bits are needed to store 1 digit of given base in binary
+ * Wo.Alpha formula: Table [log2[n], {n,2,36}]
*/
-const double lg2_lookup[] = { 1.0, 1.58496, 2, 2.32193, 2.58496, 2.80735, 3.0,
- 3.16993, 3.32193, 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0,
- 4.08746, 4.16993, 4.24793, 4.32193, 4.39232, 4.45943, 4.52356, 4.58496,
- 4.64386, 4.70044, 4.75489, 4.80735, 4.85798, 4.90689, 4.9542, 5.0,
- 5.04439, 5.08746, 5.12928, 5.16993, 5.20945, 5.24793, 5.2854, 5.32193,
- 5.35755, 5.39232, 5.42626, 5.45943, 5.49185, 5.52356, 5.55459, 5.58496,
- 5.61471, 5.64386, 5.67243, 5.70044, 5.72792, 5.75489, 5.78136, 5.80735,
- 5.83289, 5.85798, 5.88264, 5.90689, 5.93074, 5.9542, 5.97728, 6.0 };
+static const double lg2_lookup[36-1] = {
+ 1.0, 1.58496, 2.0, 2.32193, 2.58496, 2.80735, 3.0, 3.16993, 3.32193,
+ 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0, 4.08746, 4.16993, 4.24793,
+ 4.32193, 4.39232, 4.45943, 4.52356, 4.58496, 4.64386, 4.70044, 4.75489,
+ 4.80735, 4.85798, 4.90689, 4.9542, 5.0, 5.04439, 5.08746, 5.12928, 5.16993
+};
+static ERTS_INLINE double lookup_log2(Uint base) {
+ return lg2_lookup[base - 2];
+}
/*
- * for i in 2..64 do
- * d_base_exp_lookup[i-2] = 31 / lg2_lookup[i-2];
- * end
- * How many characters can fit in 31 bits
+ * How many digits can fit into a signed int (Sint) for given base, we take
+ * one digit away just to be on the safer side (some corner cases).
*/
-const byte d_base_exp_lookup[] = { 31, 19, 15, 13, 11, 11, 10, 9, 9, 8, 8, 8, 8,
- 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5 };
+static const byte digits_per_sint_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+ /* Wo.Alpha formula: Table [Trunc[31 / log[2,n]]-1, {n, 2, 36}] */
+ 30, 18, 14, 12, 10, 10, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4
+#elif (SIZEOF_VOID_P == 8)
+ /* Wo.Alpha formula: Table [Trunc[63 / log[2,n]]-1, {n, 2, 36}] */
+ 62, 38, 30, 26, 23, 21, 20, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13,
+ 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11
+#else
+ #error "Please produce a lookup table for the new architecture"
+#endif
+};
/*
- * for i in 2..64 do
- * d_base_base_lookup[i-2] = pow(i,d_base_exp_lookup[i-2]);
- * end
- * How much can the characters which fit in 31 bit represent
+ * How many digits can fit into Erlang Small (SMALL_BITS-1) counting sign bit
*/
-const Uint d_base_base_lookup[] = { 2147483648u, 1162261467u, 1073741824u,
- 1220703125u, 362797056u, 1977326743u, 1073741824u, 387420489u,
- 1000000000u, 214358881u, 429981696u, 815730721u, 1475789056u,
- 170859375u, 268435456u, 410338673u, 612220032u, 893871739u, 1280000000u,
- 1801088541u, 113379904u, 148035889u, 191102976u, 244140625u, 308915776u,
- 387420489u, 481890304u, 594823321u, 729000000u, 887503681u, 1073741824u,
- 1291467969u, 1544804416u, 1838265625u, 60466176u, 69343957u, 79235168u,
- 90224199u, 102400000u, 115856201u, 130691232u, 147008443u, 164916224u,
- 184528125u, 205962976u, 229345007u, 254803968u, 282475249u, 312500000u,
- 345025251u, 380204032u, 418195493u, 459165024u, 503284375u, 550731776u,
- 601692057u, 656356768u, 714924299u, 777600000u, 844596301u, 916132832u,
- 992436543u, 1073741824u };
+static const byte digits_per_small_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+ /* Wo.Alpha formula: Table [Trunc[27 / log[2,n]]-1, {n, 2, 36}] */
+ 27, 17, 13, 11, 10, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+#elif (SIZEOF_VOID_P == 8)
+ /* Wo.Alpha formula: Table [Trunc[59 / log[2,n]]-1, {n, 2, 36}] */
+ 59, 37, 29, 25, 22, 21, 19, 18, 17, 17, 16, 15, 15, 15, 14, 14, 14, 13, 13,
+ 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11
+#else
+ #error "Please produce a lookup table for the new architecture"
+#endif
+};
+
+/*
+ * Largest power of base which can be represented in a signed int (Sint).
+ * Calculated by base 2..36 to the power of corresponding element from
+ * digits_per_sint_lookup.
+ */
+static const Sint largest_power_of_base_lookup[36-1] = {
+#if (SIZEOF_VOID_P == 4)
+ /* Wo.Alpha formula: Table [Pow[n, Trunc[31 / log[2,n]]-1], {n, 2, 36}] */
+ 1073741824, 387420489, 268435456, 244140625, 60466176, 282475249, 134217728,
+ 43046721, 100000000, 19487171, 35831808, 62748517, 105413504, 11390625,
+ 16777216, 24137569, 34012224, 47045881, 64000000, 85766121, 5153632,
+ 6436343,7962624, 9765625, 11881376, 14348907, 17210368, 20511149, 24300000,
+ 28629151, 33554432, 39135393, 45435424, 52521875, 1679616
+#elif (SIZEOF_VOID_P == 8)
+ /* Wo.Alpha formula: Table [Pow[n, Trunc[63 / log[2,n]]-1], {n, 2, 36}]
+ * with LL added after each element manually */
+ 4611686018427387904LL, 1350851717672992089LL, 1152921504606846976LL,
+ 1490116119384765625LL, 789730223053602816LL, 558545864083284007LL,
+ 1152921504606846976LL, 150094635296999121LL, 100000000000000000LL,
+ 505447028499293771LL, 184884258895036416LL, 665416609183179841LL,
+ 155568095557812224LL, 437893890380859375LL, 72057594037927936LL,
+ 168377826559400929LL, 374813367582081024LL, 42052983462257059LL,
+ 81920000000000000LL, 154472377739119461LL, 282810057883082752LL,
+ 21914624432020321LL, 36520347436056576LL, 59604644775390625LL,
+ 95428956661682176LL, 150094635296999121LL, 232218265089212416LL,
+ 12200509765705829LL, 17714700000000000LL, 25408476896404831LL,
+ 36028797018963968LL, 50542106513726817LL, 70188843638032384LL,
+ 96549157373046875LL, 131621703842267136LL
+#else
+ #error "Please produce a lookup table for the new architecture"
+#endif
+};
Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
Uint size, const int base) {
@@ -2588,8 +2646,11 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
int neg = 0;
byte b;
Eterm *hp, *hp_end;
- int m;
+ Sint m;
int lg2;
+ const Uint digits_per_small = get_digits_per_small(base);
+ const Uint digits_per_Sint = get_digits_per_signed_int(base);
+ const Sint largest_pow_of_base = get_largest_power_of_base(base);
if (size == 0)
goto bytebuf_to_integer_1_error;
@@ -2604,57 +2665,68 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
size--;
}
+ /* Trim leading zeroes */
+ if (size) {
+ while (*bytes == '0') {
+ bytes++;
+ size--;
+ if (!size) {
+ /* All zero! */
+ res = make_small(0);
+ goto bytebuf_to_integer_1_done;
+ }
+ }
+ }
+
if (size == 0)
goto bytebuf_to_integer_1_error;
- if (size < SMALL_DIGITS && base <= 10) {
- /* *
- * Take shortcut if we know that all chars are '0' < b < '9' and
- * fit in a small. This improves speed by about 10% over the generic
- * small case.
- * */
- while (size--) {
- b = *bytes++;
+ if (size < digits_per_small) {
+ if (base <= 10) {
+ /* *
+ * Take shortcut if we know that all chars are '0' < b < '9' and
+ * fit in a small. This improves speed by about 10% over the generic
+ * small case.
+ * */
+ while (size--) {
+ b = *bytes++;
- if (b < '0' || b > ('0'+base-1))
- goto bytebuf_to_integer_1_error;
+ if (b < '0' || b > ('0'+base-1))
+ goto bytebuf_to_integer_1_error;
- i = i * base + b - '0';
- }
+ i = i * base + b - '0';
+ }
- if (neg)
- i = -i;
- res = make_small(i);
- goto bytebuf_to_integer_1_done;
+ if (neg)
+ i = -i;
+ res = make_small(i);
+ goto bytebuf_to_integer_1_done;
+ }
+
+ /* Take shortcut if we know it will fit in a small.
+ * This improves speed by about 30%.
+ */
+ while (size) {
+ b = *bytes++;
+ size--;
+
+ if (c2int_is_invalid_char(b, base))
+ goto bytebuf_to_integer_1_error;
+
+ i = i * base + c2int_digit_from_base(b);
+ }
+
+ if (neg)
+ i = -i;
+ res = make_small(i);
+ goto bytebuf_to_integer_1_done;
}
/*
* Calculate the maximum number of bits which will
* be needed to represent the binary
*/
- lg2 = ((size+2)*LG2_LOOKUP(base)+1);
-
- if (lg2 < SMALL_BITS) {
- /* Take shortcut if we know it will fit in a small.
- * This improves speed by about 30%.
- */
- while (size) {
- b = *bytes++;
- size--;
-
- if (IS_VALID_CHARACTER(b,base))
- goto bytebuf_to_integer_1_error;
-
- i = i * base + CHARACTER_FROM_BASE(b);
-
- }
-
- if (neg)
- i = -i;
- res = make_small(i);
- goto bytebuf_to_integer_1_done;
-
- }
+ lg2 = ((size+2)*lookup_log2(base)+1);
/* Start calculating bignum */
m = (lg2 + D_EXP-1)/D_EXP;
@@ -2663,8 +2735,8 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
hp = HAlloc(BIF_P, m);
hp_end = hp + m;
- if ((i = (size % D_BASE_EXP(base))) == 0)
- i = D_BASE_EXP(base);
+ if ((i = (size % digits_per_Sint)) == 0)
+ i = digits_per_Sint;
n = size - i;
m = 0;
@@ -2672,34 +2744,34 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes,
while (i--) {
b = *bytes++;
- if (IS_VALID_CHARACTER(b,base)) {
+ if (c2int_is_invalid_char(b,base)) {
HRelease(BIF_P, hp_end, hp);
goto bytebuf_to_integer_1_error;
}
- m = base * m + CHARACTER_FROM_BASE(b);
+ m = base * m + c2int_digit_from_base(b);
}
res = small_to_big(m, hp);
while (n) {
- i = D_BASE_EXP(base);
- n -= D_BASE_EXP(base);
+ i = digits_per_Sint;
+ n -= digits_per_Sint;
m = 0;
while (i--) {
b = *bytes++;
- if (IS_VALID_CHARACTER(b,base)) {
+ if (c2int_is_invalid_char(b,base)) {
HRelease(BIF_P, hp_end, hp);
goto bytebuf_to_integer_1_error;
}
- m = base * m + CHARACTER_FROM_BASE(b);
+ m = base * m + c2int_digit_from_base(b);
}
if (is_small(res)) {
res = small_to_big(signed_val(res), hp);
}
- res = big_times_small(res, D_BASE_BASE(base), hp);
+ res = big_times_small(res, largest_pow_of_base, hp);
if (is_small(res)) {
res = small_to_big(signed_val(res), hp);
}
@@ -2730,5 +2802,166 @@ bytebuf_to_integer_1_error:
bytebuf_to_integer_1_done:
return res;
+}
+/* Converts list of digits with given 'base' to integer sequentially. Returns
+ * result in 'integer_out', remaining tail goes to 'tail_out' and returns result
+ * code if the list was consumed fully or partially or there was an error
+ */
+LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list,
+ const Uint base,
+ Eterm *integer_out, Eterm *tail_out)
+{
+ Sint i = 0;
+ Uint ui = 0;
+ int skip = 0;
+ int neg = 0;
+ Sint n = 0;
+ Sint m;
+ int lg2;
+ Eterm res;
+ Eterm lst = orig_list;
+ Eterm tail = lst;
+ int error_res = LTI_BAD_STRUCTURE;
+ const Uint digits_per_small = get_digits_per_small(base);
+ const Uint digits_per_Sint = get_digits_per_signed_int(base);
+
+ if (is_nil(lst)) {
+ error_res = LTI_NO_INTEGER;
+ error:
+ *tail_out = tail;
+ *integer_out = make_small(0);
+ return error_res;
+ }
+ if (is_not_list(lst))
+ goto error;
+
+ /* if first char is a '-' then it is a negative integer */
+ if (CAR(list_val(lst)) == make_small('-')) {
+ neg = 1;
+ skip = 1;
+ lst = CDR(list_val(lst));
+ if (is_not_list(lst)) {
+ tail = lst;
+ error_res = LTI_NO_INTEGER;
+ goto error;
+ }
+ } else if (CAR(list_val(lst)) == make_small('+')) {
+ /* ignore plus */
+ skip = 1;
+ lst = CDR(list_val(lst));
+ if (is_not_list(lst)) {
+ tail = lst;
+ error_res = LTI_NO_INTEGER;
+ goto error;
+ }
+ }
+
+ /* Calculate size and do type check */
+
+ while(1) {
+ byte ch;
+ if (is_not_small(CAR(list_val(lst)))) {
+ break;
+ }
+ ch = unsigned_val(CAR(list_val(lst)));
+ if (c2int_is_invalid_char(ch, base)) {
+ break;
+ }
+ ui = ui * base;
+ ui = ui + c2int_digit_from_base(ch);
+ n++;
+ lst = CDR(list_val(lst));
+ if (is_nil(lst)) {
+ break;
+ }
+ if (is_not_list(lst)) {
+ break;
+ }
+ }
+
+ tail = lst;
+ if (!n) {
+ error_res = LTI_NO_INTEGER;
+ goto error;
+ }
+
+
+ /* If length fits inside Sint then we know it's a small int. Else we
+ * must construct a bignum and let that routine do the checking
+ */
+
+ if (n <= digits_per_small) { /* It must be small */
+ i = neg ? -(Sint)ui : (Sint)ui;
+ res = make_small(i);
+ } else {
+ const Sint largest_pow_of_base = get_largest_power_of_base(base);
+ Eterm *hp;
+ Eterm *hp_end;
+
+ /* Convert from log_base to log2 using lookup table */
+ lg2 = ((n+2)*lookup_log2(base)+1);
+ m = (lg2+D_EXP-1)/D_EXP; /* number of digits */
+ m = BIG_NEED_SIZE(m); /* number of words + thing */
+
+ hp = HAlloc(BIF_P, m);
+ hp_end = hp + m;
+
+ lst = orig_list;
+ if (skip)
+ lst = CDR(list_val(lst));
+
+ /* load first digits (at least one digit) */
+ if ((i = (n % digits_per_Sint)) == 0)
+ i = digits_per_Sint;
+ n -= i;
+ m = 0;
+ while(i--) {
+ m *= base;
+ m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst))));
+ lst = CDR(list_val(lst));
+ }
+ res = small_to_big(m, hp); /* load first digits */
+
+ while(n) {
+ i = digits_per_Sint;
+ n -= digits_per_Sint;
+ m = 0;
+ while(i--) {
+ m *= base;
+ m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst))));
+ lst = CDR(list_val(lst));
+ }
+ if (is_small(res))
+ res = small_to_big(signed_val(res), hp);
+ res = big_times_small(res, largest_pow_of_base, hp);
+ if (is_small(res))
+ res = small_to_big(signed_val(res), hp);
+ res = big_plus_small(res, m, hp);
+ }
+
+ if (neg) {
+ if (is_small(res))
+ res = make_small(-signed_val(res));
+ else {
+ Uint *big = big_val(res); /* point to thing */
+ *big = bignum_header_neg(*big);
+ }
+ }
+
+ if (is_not_small(res)) {
+ res = big_plus_small(res, 0, hp); /* includes conversion to small */
+
+ if (is_not_small(res)) {
+ hp += (big_arity(res)+1);
+ }
+ }
+ HRelease(BIF_P, hp_end, hp);
+ }
+ *integer_out = res;
+ *tail_out = tail;
+ if (tail != NIL) {
+ return LTI_SOME_INTEGER;
+ }
+ return LTI_ALL_INTEGER;
}
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index 94f9bce10e..9c92de6b55 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -54,9 +54,6 @@ typedef Uint32 ErtsHalfDigit;
#error "can not determine machine size"
#endif
-#define D_DECIMAL_EXP 9
-#define D_DECIMAL_BASE 1000000000
-
typedef Uint dsize_t; /* Vector size type */
#define D_EXP (ERTS_SIZEOF_ETERM*8)
@@ -173,4 +170,15 @@ Eterm erts_sint64_to_big(Sint64, Eterm **);
Eterm erts_chars_to_integer(Process *, char*, Uint, const int);
+/* How list_to_integer classifies the input, was it even a string? */
+typedef enum {
+ LTI_BAD_STRUCTURE = 0,
+ LTI_NO_INTEGER = 1,
+ LTI_SOME_INTEGER = 2,
+ LTI_ALL_INTEGER = 3
+} LTI_result_t;
+
+LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list,
+ const Uint base,
+ Eterm *integer_out, Eterm *tail_out);
#endif
diff --git a/erts/emulator/test/num_bif_SUITE.erl b/erts/emulator/test/num_bif_SUITE.erl
index 90b6a36262..d0840fe731 100644
--- a/erts/emulator/test/num_bif_SUITE.erl
+++ b/erts/emulator/test/num_bif_SUITE.erl
@@ -1,8 +1,8 @@
%%
%% %CopyrightBegin%
-%%
+%%
%% Copyright Ericsson AB 1997-2014. All Rights Reserved.
-%%
+%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
@@ -14,7 +14,7 @@
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
-%%
+%%
%% %CopyrightEnd%
%%
@@ -36,22 +36,22 @@
%% integer_to_binary/2
%% binary_to_integer/1
--export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1,
+-export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1,
init_per_group/2, end_per_group/2, t_abs/1, t_float/1,
t_float_to_string/1, t_integer_to_string/1,
- t_string_to_integer/1,
+ t_string_to_integer/1, t_list_to_integer_edge_cases/1,
t_string_to_float_safe/1, t_string_to_float_risky/1,
t_round/1, t_trunc/1
]).
suite() -> [{ct_hooks,[ts_install_cth]}].
-all() ->
+all() ->
[t_abs, t_float, t_float_to_string, t_integer_to_string,
{group, t_string_to_float}, t_string_to_integer, t_round,
- t_trunc].
+ t_trunc, t_list_to_integer_edge_cases].
-groups() ->
+groups() ->
[{t_string_to_float, [],
[t_string_to_float_safe, t_string_to_float_risky]}].
@@ -73,7 +73,7 @@ t_abs(Config) when is_list(Config) ->
5.5 = abs(id(5.5)),
0.0 = abs(id(0.0)),
100.0 = abs(id(-100.0)),
-
+
%% Integers.
5 = abs(id(5)),
0 = abs(id(0)),
@@ -93,7 +93,7 @@ t_abs(Config) when is_list(Config) ->
BigNum = abs(BigNum),
BigNum = abs(-BigNum),
ok.
-
+
t_float(Config) when is_list(Config) ->
0.0 = float(id(0)),
2.5 = float(id(2.5)),
@@ -109,7 +109,7 @@ t_float(Config) when is_list(Config) ->
%% Extremly big bignums.
Big = id(list_to_integer(id(lists:duplicate(2000, $1)))),
{'EXIT', {badarg, _}} = (catch float(Big)),
-
+
ok.
@@ -183,7 +183,7 @@ t_float_to_string(Config) when is_list(Config) ->
test_fts("1.2300000000e+20",1.23e20, [{scientific, 10}, compact]),
test_fts("1.23000000000000000000e+20",1.23e20, []),
ok.
-
+
test_fts(Expect, Float) ->
Expect = float_to_list(Float),
BinExpect = list_to_binary(Expect),
@@ -255,7 +255,7 @@ t_round(Config) when is_list(Config) ->
256 = round(id(255.6)),
-1033 = round(id(-1033.3)),
-1034 = round(id(-1033.6)),
-
+
% OTP-3722:
X = id((1 bsl 27) - 1),
MX = -X,
@@ -345,9 +345,9 @@ t_integer_to_string(Config) when is_list(Config) ->
%% Invalid types
lists:foreach(fun(Value) ->
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:integer_to_binary(Value)),
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:integer_to_list(Value))
end,[atom,1.2,0.0,[$1,[$2]]]),
@@ -416,27 +416,27 @@ t_string_to_integer(Config) when is_list(Config) ->
%% Invalid types
lists:foreach(fun(Value) ->
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch binary_to_integer(Value)),
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:list_to_integer(Value))
end,[atom,1.2,0.0,[$1,[$2]]]),
-
+
% Default base error cases
lists:foreach(fun(Value) ->
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:binary_to_integer(
list_to_binary(Value))),
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:list_to_integer(Value))
end,["1.0"," 1"," -1","","+"]),
-
+
% Custom base error cases
lists:foreach(fun({Value,Base}) ->
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch binary_to_integer(
list_to_binary(Value),Base)),
- {'EXIT', {badarg, _}} =
+ {'EXIT', {badarg, _}} =
(catch erlang:list_to_integer(Value,Base))
end,[{" 1",1},{" 1",37},{"2",2},{"C",11},
{"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111z",16},
@@ -449,10 +449,61 @@ t_string_to_integer(Config) when is_list(Config) ->
ok.
+%% Tests edge cases for list_to_integer; compares with known good values
+
+t_list_to_integer_edge_cases(Config) when is_list(Config) ->
+ %% Take integer literals and compare to their representation in ExtTerm
+ T = [
+ {16, "0", <<131,97,0>>},
+ {16, "-0", <<131,97,0>>},
+
+ {16, "f", <<131,97,15>>},
+ {16, "-f", <<131,98,255,255,255,241>>},
+
+ {16, "0000000000000000000000000000000000000000000000000f",
+ <<131,97,15>>},
+ {16, "-0000000000000000000000000000000000000000000000000f",
+ <<131,98,255,255,255,241>>},
+
+ {16, "ffffffff", <<131,110,4,0,255,255,255,255>>},
+ {16, "-ffffffff", <<131,110,4,1,255,255,255,255>>},
+
+ {16, "7fffffff", <<131,110,4,0,255,255,255,127>>},
+ {16, "-7fffffff", <<131,98,128,0,0,1>>},
+
+ {16, "ffffffffffffffff",
+ <<131,110,8,0,255,255,255,255,255,255,255,255>>},
+ {16, "-ffffffffffffffff",
+ <<131,110,8,1,255,255,255,255,255,255,255,255>>},
+
+ {16, "7fffffffffffffff",
+ <<131,110,8,0,255,255,255,255,255,255,255,127>>},
+ {16, "-7fffffffffffffff",
+ <<131,110,8,1,255,255,255,255,255,255,255,127>>},
+
+ %% Alleged 32-bit corner case (should not happen on 64-bit). At 32-4
+ %% bits we may corrupt sign bit and fall out of SMALL_INT range.
+ {2, "1000000000000000000000000000", <<131,98,8,0,0,0>>},
+ {2, "-1000000000000000000000000000", <<131,98,248,0,0,0>>},
+
+ %% 64-bit corner case (should not happen on 32-bit) at 64-4 bits we
+ %% corrupt sign bit and fall out of SMALL_INT range (bam! all dead)
+ {2, "100000000000000000000000000000000000000000000000000000000000",
+ <<131,110,8,0,0,0,0,0,0,0,0,8>>},
+ {2, "-100000000000000000000000000000000000000000000000000000000000",
+ <<131,110,8,1,0,0,0,0,0,0,0,8>>}
+ ],
+ [begin
+ io:format("~s base ~p vs ~p~n", [Str, Base, Bin]),
+ FromStr = list_to_integer(Str, Base),
+ FromStr = binary_to_term(Bin)
+ end || {Base, Str, Bin} <- T],
+ ok.
+
test_sti(Num) ->
[begin
io:format("Testing ~p:~p",[Num,Base]),
- test_sti(Num,Base)
+ test_sti(Num,Base)
end|| Base <- lists:seq(2,36)].
test_sti(Num,Base) ->