diff options
author | Lukas Larsson <[email protected]> | 2016-02-03 14:42:45 +0100 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2016-02-03 14:42:45 +0100 |
commit | b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd (patch) | |
tree | 6806637c88ccaff8bb6d28d3b3e1b49ca4c19f6a /erts | |
parent | 0cf09adc86c87e8b25005c6c305cb419b050c344 (diff) | |
parent | c96b6c2f58642b457d806c0a8a5bed03d16e35f1 (diff) | |
download | otp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.tar.gz otp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.tar.bz2 otp-b200eebcf53b44ea8e5d3f1003f5a0c160b8dbbd.zip |
Merge branch 'kvakvs/erts/list_to_integer/OTP-13293'
* kvakvs/erts/list_to_integer/OTP-13293:
Better list_to_integer
Moved do_list_to_integer from bif.c to big.c
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/bif.c | 205 | ||||
-rw-r--r-- | erts/emulator/beam/big.c | 433 | ||||
-rw-r--r-- | erts/emulator/beam/big.h | 14 | ||||
-rw-r--r-- | erts/emulator/test/num_bif_SUITE.erl | 99 |
4 files changed, 436 insertions, 315 deletions
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 7e8eb44680..e116b10b95 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -2917,175 +2917,20 @@ BIF_RETTYPE integer_to_list_1(BIF_ALIST_1) /**********************************************************************/ -/* convert a list of ascii ascii integer value to an integer */ - - -#define LTI_BAD_STRUCTURE 0 -#define LTI_NO_INTEGER 1 -#define LTI_SOME_INTEGER 2 -#define LTI_ALL_INTEGER 3 - -static int do_list_to_integer(Process *p, Eterm orig_list, - Eterm *integer, Eterm *rest) -{ - Sint i = 0; - Uint ui = 0; - int skip = 0; - int neg = 0; - Sint n = 0; - int m; - int lg2; - Eterm res; - Eterm* hp; - Eterm *hp_end; - Eterm lst = orig_list; - Eterm tail = lst; - int error_res = LTI_BAD_STRUCTURE; - - if (is_nil(lst)) { - error_res = LTI_NO_INTEGER; - error: - *rest = tail; - *integer = make_small(0); - return error_res; - } - if (is_not_list(lst)) - goto error; - - /* if first char is a '-' then it is a negative integer */ - if (CAR(list_val(lst)) == make_small('-')) { - neg = 1; - skip = 1; - lst = CDR(list_val(lst)); - if (is_not_list(lst)) { - tail = lst; - error_res = LTI_NO_INTEGER; - goto error; - } - } else if (CAR(list_val(lst)) == make_small('+')) { - /* ignore plus */ - skip = 1; - lst = CDR(list_val(lst)); - if (is_not_list(lst)) { - tail = lst; - error_res = LTI_NO_INTEGER; - goto error; - } - } - - /* Calculate size and do type check */ - - while(1) { - if (is_not_small(CAR(list_val(lst)))) { - break; - } - if (unsigned_val(CAR(list_val(lst))) < '0' || - unsigned_val(CAR(list_val(lst))) > '9') { - break; - } - ui = ui * 10; - ui = ui + unsigned_val(CAR(list_val(lst))) - '0'; - n++; - lst = CDR(list_val(lst)); - if (is_nil(lst)) { - break; - } - if (is_not_list(lst)) { - break; - } - } - - tail = lst; - if (!n) { - error_res = LTI_NO_INTEGER; - goto error; - } - - - /* If n <= 8 then we know it's a small int - ** since 2^27 = 134217728. If n > 8 then we must - ** construct a bignum and let that routine do the checking - */ - - if (n <= SMALL_DIGITS) { /* It must be small */ - if (neg) i = -(Sint)ui; - else i = (Sint)ui; - res = make_small(i); - } else { - /* Convert from log10 to log2 by multiplying with 1/log10(2)=3.3219 - which we round up to (3 + 1/3) */ - lg2 = (n+1)*3 + (n+1)/3 + 1; - m = (lg2+D_EXP-1)/D_EXP; /* number of digits */ - m = BIG_NEED_SIZE(m); /* number of words + thing */ - - hp = HAlloc(p, m); - hp_end = hp + m; - - lst = orig_list; - if (skip) - lst = CDR(list_val(lst)); - - /* load first digits (at least one digit) */ - if ((i = (n % D_DECIMAL_EXP)) == 0) - i = D_DECIMAL_EXP; - n -= i; - m = 0; - while(i--) { - m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0'); - lst = CDR(list_val(lst)); - } - res = small_to_big(m, hp); /* load first digits */ - - while(n) { - i = D_DECIMAL_EXP; - n -= D_DECIMAL_EXP; - m = 0; - while(i--) { - m = 10*m + (unsigned_val(CAR(list_val(lst))) - '0'); - lst = CDR(list_val(lst)); - } - if (is_small(res)) - res = small_to_big(signed_val(res), hp); - res = big_times_small(res, D_DECIMAL_BASE, hp); - if (is_small(res)) - res = small_to_big(signed_val(res), hp); - res = big_plus_small(res, m, hp); - } - - if (neg) { - if (is_small(res)) - res = make_small(-signed_val(res)); - else { - Uint *big = big_val(res); /* point to thing */ - *big = bignum_header_neg(*big); - } - } - - if (is_not_small(res)) { - res = big_plus_small(res, 0, hp); /* includes conversion to small */ +/* + * Converts a list of ascii base10 digits to an integer fully or partially. + * Returns result and the remaining tail. + * On error returns: {error,not_a_list}, or {error, no_integer} + */ - if (is_not_small(res)) { - hp += (big_arity(res)+1); - } - } - HRelease(p,hp_end,hp); - } - *integer = res; - *rest = tail; - if (tail != NIL) { - return LTI_SOME_INTEGER; - } - return LTI_ALL_INTEGER; -} BIF_RETTYPE string_to_integer_1(BIF_ALIST_1) { Eterm res; Eterm tail; Eterm *hp; /* must be a list */ - switch (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&tail)) { - /* HAlloc after do_list_to_integer as it - might HAlloc itself (bignum) */ + switch (erts_list_to_integer(BIF_P, BIF_ARG_1, 10, &res, &tail)) { + /* HAlloc after erts_list_to_integer as it might HAlloc itself (bignum) */ case LTI_BAD_STRUCTURE: hp = HAlloc(BIF_P,3); BIF_RET(TUPLE2(hp, am_error, am_not_a_list)); @@ -3100,13 +2945,14 @@ BIF_RETTYPE string_to_integer_1(BIF_ALIST_1) BIF_RETTYPE list_to_integer_1(BIF_ALIST_1) { - /* Using do_list_to_integer is about twice as fast as using + /* Using erts_list_to_integer is about twice as fast as using erts_chars_to_integer because we do not have to copy the entire list */ Eterm res; Eterm dummy; /* must be a list */ - if (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&dummy) != LTI_ALL_INTEGER) { + if (erts_list_to_integer(BIF_P, BIF_ARG_1, 10, + &res, &dummy) != LTI_ALL_INTEGER) { BIF_ERROR(BIF_P,BADARG); } BIF_RET(res); @@ -3114,14 +2960,12 @@ BIF_RETTYPE list_to_integer_1(BIF_ALIST_1) BIF_RETTYPE list_to_integer_2(BIF_ALIST_2) { - /* Bif implementation is about 50% faster than pure erlang, and since we have erts_chars_to_integer now it is simpler as well. This could be optmized further if we did not have to copy the list to buf. */ int i; - Eterm res; - char *buf = NULL; + Eterm res, dummy; int base; i = erts_list_length(BIF_ARG_1); @@ -3129,31 +2973,16 @@ BIF_RETTYPE list_to_integer_2(BIF_ALIST_2) BIF_ERROR(BIF_P, BADARG); base = signed_val(BIF_ARG_2); - + if (base < 2 || base > 36) BIF_ERROR(BIF_P, BADARG); - /* Take fast path if base it 10 */ - if (base == 10) - return list_to_integer_1(BIF_P,&BIF_ARG_1); - - buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1); - - if (intlist_to_buf(BIF_ARG_1, buf, i) < 0) - goto list_to_integer_1_error; - buf[i] = '\0'; /* null terminal */ - - if ((res = erts_chars_to_integer(BIF_P,buf,i,base)) == THE_NON_VALUE) - goto list_to_integer_1_error; - - erts_free(ERTS_ALC_T_TMP, (void *) buf); + if (erts_list_to_integer(BIF_P, BIF_ARG_1, base, + &res, &dummy) != LTI_ALL_INTEGER) { + BIF_ERROR(BIF_P,BADARG); + } BIF_RET(res); - - list_to_integer_1_error: - erts_free(ERTS_ALC_T_TMP, (void *) buf); - BIF_ERROR(BIF_P, BADARG); - - } +} /**********************************************************************/ diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 02d37e24df..11838e24ef 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -48,7 +48,7 @@ _t_dst = (dst)+((sz)-1); \ _t_src = (src)+((sz)-1); \ while(_t_sz--) *_t_dst-- = *_t_src--; \ - } \ + } \ } while(0) /* add a and b with carry in + out */ @@ -423,6 +423,25 @@ #endif +/* Forward declaration of lookup tables (See below in this file) used in list to + * integer conversions for different bases. Also used in bignum printing. + */ +static const byte digits_per_sint_lookup[36-1]; +static const byte digits_per_small_lookup[36-1]; +static const Sint largest_power_of_base_lookup[36-1]; + +static ERTS_INLINE byte get_digits_per_signed_int(Uint base) { + return digits_per_sint_lookup[base-2]; +} + +static ERTS_INLINE byte get_digits_per_small(Uint base) { + return digits_per_small_lookup[base-2]; +} + +static ERTS_INLINE Sint get_largest_power_of_base(Uint base) { + return largest_power_of_base_lookup[base-2]; +} + /* ** compare two number vectors */ @@ -1719,8 +1738,10 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg) short sign = BIG_SIGN(xp); ErtsDigit rem; Uint n = 0; + const Uint digits_per_Sint = get_digits_per_signed_int(10); + const Sint largest_pow_of_base = get_largest_power_of_base(10); - if (xl == 1 && *dx < D_DECIMAL_BASE) { + if (xl == 1 && *dx < largest_pow_of_base) { rem = *dx; if (rem == 0) { (*write_func)(arg, '0'); n++; @@ -1738,7 +1759,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg) MOVE_DIGITS(tmp, dx, xl); while(1) { - tmpl = D_div(tmp, tmpl, D_DECIMAL_BASE, tmp, &rem); + tmpl = D_div(tmp, tmpl, largest_pow_of_base, tmp, &rem); if (tmpl == 1 && *tmp == 0) { while(rem) { (*write_func)(arg, (rem % 10)+'0'); n++; @@ -1746,7 +1767,7 @@ static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg) } break; } else { - int i = D_DECIMAL_EXP; + Uint i = digits_per_Sint; while(i--) { (*write_func)(arg, (rem % 10)+'0'); n++; rem /= 10; @@ -2522,63 +2543,100 @@ int term_equals_2pow32(Eterm x) } } +static ERTS_INLINE int c2int_is_invalid_char(byte ch, int base) { + return (ch < '0' + || (ch > ('0' + base - 1) + && !(base > 10 + && ((ch >= 'a' && ch < ('a' + base - 10)) + || (ch >= 'A' && ch < ('A' + base - 10)))))); +} -#define IS_VALID_CHARACTER(CHAR,BASE) \ - (CHAR < '0' \ - || (CHAR > ('0' + BASE - 1) \ - && !(BASE > 10 \ - && ((CHAR >= 'a' && CHAR < ('a' + BASE - 10)) \ - || (CHAR >= 'A' && CHAR < ('A' + BASE - 10)))))) -#define CHARACTER_FROM_BASE(CHAR) \ - ((CHAR <= '9') ? CHAR - '0' : 10 + ((CHAR <= 'Z') ? CHAR - 'A' : CHAR - 'a')) -#define D_BASE_EXP(BASE) (d_base_exp_lookup[BASE-2]) -#define D_BASE_BASE(BASE) (d_base_base_lookup[BASE-2]) -#define LG2_LOOKUP(BASE) (lg2_lookup[base-2]) +static ERTS_INLINE byte c2int_digit_from_base(byte ch) { + return ch <= '9' ? ch - '0' + : (10 + (ch <= 'Z' ? ch - 'A' : ch - 'a')); +} /* - * for i in 2..64 do - * lg2_lookup[i-2] = log2(i) - * end - * How many bits are needed to store string of size n + * How many bits are needed to store 1 digit of given base in binary + * Wo.Alpha formula: Table [log2[n], {n,2,36}] */ -const double lg2_lookup[] = { 1.0, 1.58496, 2, 2.32193, 2.58496, 2.80735, 3.0, - 3.16993, 3.32193, 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0, - 4.08746, 4.16993, 4.24793, 4.32193, 4.39232, 4.45943, 4.52356, 4.58496, - 4.64386, 4.70044, 4.75489, 4.80735, 4.85798, 4.90689, 4.9542, 5.0, - 5.04439, 5.08746, 5.12928, 5.16993, 5.20945, 5.24793, 5.2854, 5.32193, - 5.35755, 5.39232, 5.42626, 5.45943, 5.49185, 5.52356, 5.55459, 5.58496, - 5.61471, 5.64386, 5.67243, 5.70044, 5.72792, 5.75489, 5.78136, 5.80735, - 5.83289, 5.85798, 5.88264, 5.90689, 5.93074, 5.9542, 5.97728, 6.0 }; +static const double lg2_lookup[36-1] = { + 1.0, 1.58496, 2.0, 2.32193, 2.58496, 2.80735, 3.0, 3.16993, 3.32193, + 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0, 4.08746, 4.16993, 4.24793, + 4.32193, 4.39232, 4.45943, 4.52356, 4.58496, 4.64386, 4.70044, 4.75489, + 4.80735, 4.85798, 4.90689, 4.9542, 5.0, 5.04439, 5.08746, 5.12928, 5.16993 +}; +static ERTS_INLINE double lookup_log2(Uint base) { + return lg2_lookup[base - 2]; +} /* - * for i in 2..64 do - * d_base_exp_lookup[i-2] = 31 / lg2_lookup[i-2]; - * end - * How many characters can fit in 31 bits + * How many digits can fit into a signed int (Sint) for given base, we take + * one digit away just to be on the safer side (some corner cases). */ -const byte d_base_exp_lookup[] = { 31, 19, 15, 13, 11, 11, 10, 9, 9, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5 }; +static const byte digits_per_sint_lookup[36-1] = { +#if (SIZEOF_VOID_P == 4) + /* Wo.Alpha formula: Table [Trunc[31 / log[2,n]]-1, {n, 2, 36}] */ + 30, 18, 14, 12, 10, 10, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4 +#elif (SIZEOF_VOID_P == 8) + /* Wo.Alpha formula: Table [Trunc[63 / log[2,n]]-1, {n, 2, 36}] */ + 62, 38, 30, 26, 23, 21, 20, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, + 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11 +#else + #error "Please produce a lookup table for the new architecture" +#endif +}; /* - * for i in 2..64 do - * d_base_base_lookup[i-2] = pow(i,d_base_exp_lookup[i-2]); - * end - * How much can the characters which fit in 31 bit represent + * How many digits can fit into Erlang Small (SMALL_BITS-1) counting sign bit */ -const Uint d_base_base_lookup[] = { 2147483648u, 1162261467u, 1073741824u, - 1220703125u, 362797056u, 1977326743u, 1073741824u, 387420489u, - 1000000000u, 214358881u, 429981696u, 815730721u, 1475789056u, - 170859375u, 268435456u, 410338673u, 612220032u, 893871739u, 1280000000u, - 1801088541u, 113379904u, 148035889u, 191102976u, 244140625u, 308915776u, - 387420489u, 481890304u, 594823321u, 729000000u, 887503681u, 1073741824u, - 1291467969u, 1544804416u, 1838265625u, 60466176u, 69343957u, 79235168u, - 90224199u, 102400000u, 115856201u, 130691232u, 147008443u, 164916224u, - 184528125u, 205962976u, 229345007u, 254803968u, 282475249u, 312500000u, - 345025251u, 380204032u, 418195493u, 459165024u, 503284375u, 550731776u, - 601692057u, 656356768u, 714924299u, 777600000u, 844596301u, 916132832u, - 992436543u, 1073741824u }; +static const byte digits_per_small_lookup[36-1] = { +#if (SIZEOF_VOID_P == 4) + /* Wo.Alpha formula: Table [Trunc[27 / log[2,n]]-1, {n, 2, 36}] */ + 27, 17, 13, 11, 10, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +#elif (SIZEOF_VOID_P == 8) + /* Wo.Alpha formula: Table [Trunc[59 / log[2,n]]-1, {n, 2, 36}] */ + 59, 37, 29, 25, 22, 21, 19, 18, 17, 17, 16, 15, 15, 15, 14, 14, 14, 13, 13, + 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11 +#else + #error "Please produce a lookup table for the new architecture" +#endif +}; + +/* + * Largest power of base which can be represented in a signed int (Sint). + * Calculated by base 2..36 to the power of corresponding element from + * digits_per_sint_lookup. + */ +static const Sint largest_power_of_base_lookup[36-1] = { +#if (SIZEOF_VOID_P == 4) + /* Wo.Alpha formula: Table [Pow[n, Trunc[31 / log[2,n]]-1], {n, 2, 36}] */ + 1073741824, 387420489, 268435456, 244140625, 60466176, 282475249, 134217728, + 43046721, 100000000, 19487171, 35831808, 62748517, 105413504, 11390625, + 16777216, 24137569, 34012224, 47045881, 64000000, 85766121, 5153632, + 6436343,7962624, 9765625, 11881376, 14348907, 17210368, 20511149, 24300000, + 28629151, 33554432, 39135393, 45435424, 52521875, 1679616 +#elif (SIZEOF_VOID_P == 8) + /* Wo.Alpha formula: Table [Pow[n, Trunc[63 / log[2,n]]-1], {n, 2, 36}] + * with LL added after each element manually */ + 4611686018427387904LL, 1350851717672992089LL, 1152921504606846976LL, + 1490116119384765625LL, 789730223053602816LL, 558545864083284007LL, + 1152921504606846976LL, 150094635296999121LL, 100000000000000000LL, + 505447028499293771LL, 184884258895036416LL, 665416609183179841LL, + 155568095557812224LL, 437893890380859375LL, 72057594037927936LL, + 168377826559400929LL, 374813367582081024LL, 42052983462257059LL, + 81920000000000000LL, 154472377739119461LL, 282810057883082752LL, + 21914624432020321LL, 36520347436056576LL, 59604644775390625LL, + 95428956661682176LL, 150094635296999121LL, 232218265089212416LL, + 12200509765705829LL, 17714700000000000LL, 25408476896404831LL, + 36028797018963968LL, 50542106513726817LL, 70188843638032384LL, + 96549157373046875LL, 131621703842267136LL +#else + #error "Please produce a lookup table for the new architecture" +#endif +}; Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, Uint size, const int base) { @@ -2588,8 +2646,11 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, int neg = 0; byte b; Eterm *hp, *hp_end; - int m; + Sint m; int lg2; + const Uint digits_per_small = get_digits_per_small(base); + const Uint digits_per_Sint = get_digits_per_signed_int(base); + const Sint largest_pow_of_base = get_largest_power_of_base(base); if (size == 0) goto bytebuf_to_integer_1_error; @@ -2604,57 +2665,68 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, size--; } + /* Trim leading zeroes */ + if (size) { + while (*bytes == '0') { + bytes++; + size--; + if (!size) { + /* All zero! */ + res = make_small(0); + goto bytebuf_to_integer_1_done; + } + } + } + if (size == 0) goto bytebuf_to_integer_1_error; - if (size < SMALL_DIGITS && base <= 10) { - /* * - * Take shortcut if we know that all chars are '0' < b < '9' and - * fit in a small. This improves speed by about 10% over the generic - * small case. - * */ - while (size--) { - b = *bytes++; + if (size < digits_per_small) { + if (base <= 10) { + /* * + * Take shortcut if we know that all chars are '0' < b < '9' and + * fit in a small. This improves speed by about 10% over the generic + * small case. + * */ + while (size--) { + b = *bytes++; - if (b < '0' || b > ('0'+base-1)) - goto bytebuf_to_integer_1_error; + if (b < '0' || b > ('0'+base-1)) + goto bytebuf_to_integer_1_error; - i = i * base + b - '0'; - } + i = i * base + b - '0'; + } - if (neg) - i = -i; - res = make_small(i); - goto bytebuf_to_integer_1_done; + if (neg) + i = -i; + res = make_small(i); + goto bytebuf_to_integer_1_done; + } + + /* Take shortcut if we know it will fit in a small. + * This improves speed by about 30%. + */ + while (size) { + b = *bytes++; + size--; + + if (c2int_is_invalid_char(b, base)) + goto bytebuf_to_integer_1_error; + + i = i * base + c2int_digit_from_base(b); + } + + if (neg) + i = -i; + res = make_small(i); + goto bytebuf_to_integer_1_done; } /* * Calculate the maximum number of bits which will * be needed to represent the binary */ - lg2 = ((size+2)*LG2_LOOKUP(base)+1); - - if (lg2 < SMALL_BITS) { - /* Take shortcut if we know it will fit in a small. - * This improves speed by about 30%. - */ - while (size) { - b = *bytes++; - size--; - - if (IS_VALID_CHARACTER(b,base)) - goto bytebuf_to_integer_1_error; - - i = i * base + CHARACTER_FROM_BASE(b); - - } - - if (neg) - i = -i; - res = make_small(i); - goto bytebuf_to_integer_1_done; - - } + lg2 = ((size+2)*lookup_log2(base)+1); /* Start calculating bignum */ m = (lg2 + D_EXP-1)/D_EXP; @@ -2663,8 +2735,8 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, hp = HAlloc(BIF_P, m); hp_end = hp + m; - if ((i = (size % D_BASE_EXP(base))) == 0) - i = D_BASE_EXP(base); + if ((i = (size % digits_per_Sint)) == 0) + i = digits_per_Sint; n = size - i; m = 0; @@ -2672,34 +2744,34 @@ Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, while (i--) { b = *bytes++; - if (IS_VALID_CHARACTER(b,base)) { + if (c2int_is_invalid_char(b,base)) { HRelease(BIF_P, hp_end, hp); goto bytebuf_to_integer_1_error; } - m = base * m + CHARACTER_FROM_BASE(b); + m = base * m + c2int_digit_from_base(b); } res = small_to_big(m, hp); while (n) { - i = D_BASE_EXP(base); - n -= D_BASE_EXP(base); + i = digits_per_Sint; + n -= digits_per_Sint; m = 0; while (i--) { b = *bytes++; - if (IS_VALID_CHARACTER(b,base)) { + if (c2int_is_invalid_char(b,base)) { HRelease(BIF_P, hp_end, hp); goto bytebuf_to_integer_1_error; } - m = base * m + CHARACTER_FROM_BASE(b); + m = base * m + c2int_digit_from_base(b); } if (is_small(res)) { res = small_to_big(signed_val(res), hp); } - res = big_times_small(res, D_BASE_BASE(base), hp); + res = big_times_small(res, largest_pow_of_base, hp); if (is_small(res)) { res = small_to_big(signed_val(res), hp); } @@ -2730,5 +2802,166 @@ bytebuf_to_integer_1_error: bytebuf_to_integer_1_done: return res; +} +/* Converts list of digits with given 'base' to integer sequentially. Returns + * result in 'integer_out', remaining tail goes to 'tail_out' and returns result + * code if the list was consumed fully or partially or there was an error + */ +LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list, + const Uint base, + Eterm *integer_out, Eterm *tail_out) +{ + Sint i = 0; + Uint ui = 0; + int skip = 0; + int neg = 0; + Sint n = 0; + Sint m; + int lg2; + Eterm res; + Eterm lst = orig_list; + Eterm tail = lst; + int error_res = LTI_BAD_STRUCTURE; + const Uint digits_per_small = get_digits_per_small(base); + const Uint digits_per_Sint = get_digits_per_signed_int(base); + + if (is_nil(lst)) { + error_res = LTI_NO_INTEGER; + error: + *tail_out = tail; + *integer_out = make_small(0); + return error_res; + } + if (is_not_list(lst)) + goto error; + + /* if first char is a '-' then it is a negative integer */ + if (CAR(list_val(lst)) == make_small('-')) { + neg = 1; + skip = 1; + lst = CDR(list_val(lst)); + if (is_not_list(lst)) { + tail = lst; + error_res = LTI_NO_INTEGER; + goto error; + } + } else if (CAR(list_val(lst)) == make_small('+')) { + /* ignore plus */ + skip = 1; + lst = CDR(list_val(lst)); + if (is_not_list(lst)) { + tail = lst; + error_res = LTI_NO_INTEGER; + goto error; + } + } + + /* Calculate size and do type check */ + + while(1) { + byte ch; + if (is_not_small(CAR(list_val(lst)))) { + break; + } + ch = unsigned_val(CAR(list_val(lst))); + if (c2int_is_invalid_char(ch, base)) { + break; + } + ui = ui * base; + ui = ui + c2int_digit_from_base(ch); + n++; + lst = CDR(list_val(lst)); + if (is_nil(lst)) { + break; + } + if (is_not_list(lst)) { + break; + } + } + + tail = lst; + if (!n) { + error_res = LTI_NO_INTEGER; + goto error; + } + + + /* If length fits inside Sint then we know it's a small int. Else we + * must construct a bignum and let that routine do the checking + */ + + if (n <= digits_per_small) { /* It must be small */ + i = neg ? -(Sint)ui : (Sint)ui; + res = make_small(i); + } else { + const Sint largest_pow_of_base = get_largest_power_of_base(base); + Eterm *hp; + Eterm *hp_end; + + /* Convert from log_base to log2 using lookup table */ + lg2 = ((n+2)*lookup_log2(base)+1); + m = (lg2+D_EXP-1)/D_EXP; /* number of digits */ + m = BIG_NEED_SIZE(m); /* number of words + thing */ + + hp = HAlloc(BIF_P, m); + hp_end = hp + m; + + lst = orig_list; + if (skip) + lst = CDR(list_val(lst)); + + /* load first digits (at least one digit) */ + if ((i = (n % digits_per_Sint)) == 0) + i = digits_per_Sint; + n -= i; + m = 0; + while(i--) { + m *= base; + m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst)))); + lst = CDR(list_val(lst)); + } + res = small_to_big(m, hp); /* load first digits */ + + while(n) { + i = digits_per_Sint; + n -= digits_per_Sint; + m = 0; + while(i--) { + m *= base; + m += c2int_digit_from_base(unsigned_val(CAR(list_val(lst)))); + lst = CDR(list_val(lst)); + } + if (is_small(res)) + res = small_to_big(signed_val(res), hp); + res = big_times_small(res, largest_pow_of_base, hp); + if (is_small(res)) + res = small_to_big(signed_val(res), hp); + res = big_plus_small(res, m, hp); + } + + if (neg) { + if (is_small(res)) + res = make_small(-signed_val(res)); + else { + Uint *big = big_val(res); /* point to thing */ + *big = bignum_header_neg(*big); + } + } + + if (is_not_small(res)) { + res = big_plus_small(res, 0, hp); /* includes conversion to small */ + + if (is_not_small(res)) { + hp += (big_arity(res)+1); + } + } + HRelease(BIF_P, hp_end, hp); + } + *integer_out = res; + *tail_out = tail; + if (tail != NIL) { + return LTI_SOME_INTEGER; + } + return LTI_ALL_INTEGER; } diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 94f9bce10e..9c92de6b55 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -54,9 +54,6 @@ typedef Uint32 ErtsHalfDigit; #error "can not determine machine size" #endif -#define D_DECIMAL_EXP 9 -#define D_DECIMAL_BASE 1000000000 - typedef Uint dsize_t; /* Vector size type */ #define D_EXP (ERTS_SIZEOF_ETERM*8) @@ -173,4 +170,15 @@ Eterm erts_sint64_to_big(Sint64, Eterm **); Eterm erts_chars_to_integer(Process *, char*, Uint, const int); +/* How list_to_integer classifies the input, was it even a string? */ +typedef enum { + LTI_BAD_STRUCTURE = 0, + LTI_NO_INTEGER = 1, + LTI_SOME_INTEGER = 2, + LTI_ALL_INTEGER = 3 +} LTI_result_t; + +LTI_result_t erts_list_to_integer(Process *BIF_P, Eterm orig_list, + const Uint base, + Eterm *integer_out, Eterm *tail_out); #endif diff --git a/erts/emulator/test/num_bif_SUITE.erl b/erts/emulator/test/num_bif_SUITE.erl index 90b6a36262..d0840fe731 100644 --- a/erts/emulator/test/num_bif_SUITE.erl +++ b/erts/emulator/test/num_bif_SUITE.erl @@ -1,8 +1,8 @@ %% %% %CopyrightBegin% -%% +%% %% Copyright Ericsson AB 1997-2014. All Rights Reserved. -%% +%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,7 +14,7 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% +%% %% %CopyrightEnd% %% @@ -36,22 +36,22 @@ %% integer_to_binary/2 %% binary_to_integer/1 --export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1, +-export([all/0, suite/0, groups/0, init_per_suite/1, end_per_suite/1, init_per_group/2, end_per_group/2, t_abs/1, t_float/1, t_float_to_string/1, t_integer_to_string/1, - t_string_to_integer/1, + t_string_to_integer/1, t_list_to_integer_edge_cases/1, t_string_to_float_safe/1, t_string_to_float_risky/1, t_round/1, t_trunc/1 ]). suite() -> [{ct_hooks,[ts_install_cth]}]. -all() -> +all() -> [t_abs, t_float, t_float_to_string, t_integer_to_string, {group, t_string_to_float}, t_string_to_integer, t_round, - t_trunc]. + t_trunc, t_list_to_integer_edge_cases]. -groups() -> +groups() -> [{t_string_to_float, [], [t_string_to_float_safe, t_string_to_float_risky]}]. @@ -73,7 +73,7 @@ t_abs(Config) when is_list(Config) -> 5.5 = abs(id(5.5)), 0.0 = abs(id(0.0)), 100.0 = abs(id(-100.0)), - + %% Integers. 5 = abs(id(5)), 0 = abs(id(0)), @@ -93,7 +93,7 @@ t_abs(Config) when is_list(Config) -> BigNum = abs(BigNum), BigNum = abs(-BigNum), ok. - + t_float(Config) when is_list(Config) -> 0.0 = float(id(0)), 2.5 = float(id(2.5)), @@ -109,7 +109,7 @@ t_float(Config) when is_list(Config) -> %% Extremly big bignums. Big = id(list_to_integer(id(lists:duplicate(2000, $1)))), {'EXIT', {badarg, _}} = (catch float(Big)), - + ok. @@ -183,7 +183,7 @@ t_float_to_string(Config) when is_list(Config) -> test_fts("1.2300000000e+20",1.23e20, [{scientific, 10}, compact]), test_fts("1.23000000000000000000e+20",1.23e20, []), ok. - + test_fts(Expect, Float) -> Expect = float_to_list(Float), BinExpect = list_to_binary(Expect), @@ -255,7 +255,7 @@ t_round(Config) when is_list(Config) -> 256 = round(id(255.6)), -1033 = round(id(-1033.3)), -1034 = round(id(-1033.6)), - + % OTP-3722: X = id((1 bsl 27) - 1), MX = -X, @@ -345,9 +345,9 @@ t_integer_to_string(Config) when is_list(Config) -> %% Invalid types lists:foreach(fun(Value) -> - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:integer_to_binary(Value)), - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:integer_to_list(Value)) end,[atom,1.2,0.0,[$1,[$2]]]), @@ -416,27 +416,27 @@ t_string_to_integer(Config) when is_list(Config) -> %% Invalid types lists:foreach(fun(Value) -> - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch binary_to_integer(Value)), - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:list_to_integer(Value)) end,[atom,1.2,0.0,[$1,[$2]]]), - + % Default base error cases lists:foreach(fun(Value) -> - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:binary_to_integer( list_to_binary(Value))), - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:list_to_integer(Value)) end,["1.0"," 1"," -1","","+"]), - + % Custom base error cases lists:foreach(fun({Value,Base}) -> - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch binary_to_integer( list_to_binary(Value),Base)), - {'EXIT', {badarg, _}} = + {'EXIT', {badarg, _}} = (catch erlang:list_to_integer(Value,Base)) end,[{" 1",1},{" 1",37},{"2",2},{"C",11}, {"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111z",16}, @@ -449,10 +449,61 @@ t_string_to_integer(Config) when is_list(Config) -> ok. +%% Tests edge cases for list_to_integer; compares with known good values + +t_list_to_integer_edge_cases(Config) when is_list(Config) -> + %% Take integer literals and compare to their representation in ExtTerm + T = [ + {16, "0", <<131,97,0>>}, + {16, "-0", <<131,97,0>>}, + + {16, "f", <<131,97,15>>}, + {16, "-f", <<131,98,255,255,255,241>>}, + + {16, "0000000000000000000000000000000000000000000000000f", + <<131,97,15>>}, + {16, "-0000000000000000000000000000000000000000000000000f", + <<131,98,255,255,255,241>>}, + + {16, "ffffffff", <<131,110,4,0,255,255,255,255>>}, + {16, "-ffffffff", <<131,110,4,1,255,255,255,255>>}, + + {16, "7fffffff", <<131,110,4,0,255,255,255,127>>}, + {16, "-7fffffff", <<131,98,128,0,0,1>>}, + + {16, "ffffffffffffffff", + <<131,110,8,0,255,255,255,255,255,255,255,255>>}, + {16, "-ffffffffffffffff", + <<131,110,8,1,255,255,255,255,255,255,255,255>>}, + + {16, "7fffffffffffffff", + <<131,110,8,0,255,255,255,255,255,255,255,127>>}, + {16, "-7fffffffffffffff", + <<131,110,8,1,255,255,255,255,255,255,255,127>>}, + + %% Alleged 32-bit corner case (should not happen on 64-bit). At 32-4 + %% bits we may corrupt sign bit and fall out of SMALL_INT range. + {2, "1000000000000000000000000000", <<131,98,8,0,0,0>>}, + {2, "-1000000000000000000000000000", <<131,98,248,0,0,0>>}, + + %% 64-bit corner case (should not happen on 32-bit) at 64-4 bits we + %% corrupt sign bit and fall out of SMALL_INT range (bam! all dead) + {2, "100000000000000000000000000000000000000000000000000000000000", + <<131,110,8,0,0,0,0,0,0,0,0,8>>}, + {2, "-100000000000000000000000000000000000000000000000000000000000", + <<131,110,8,1,0,0,0,0,0,0,0,8>>} + ], + [begin + io:format("~s base ~p vs ~p~n", [Str, Base, Bin]), + FromStr = list_to_integer(Str, Base), + FromStr = binary_to_term(Bin) + end || {Base, Str, Bin} <- T], + ok. + test_sti(Num) -> [begin io:format("Testing ~p:~p",[Num,Base]), - test_sti(Num,Base) + test_sti(Num,Base) end|| Base <- lists:seq(2,36)]. test_sti(Num,Base) -> |