From b074099cc6bdb81285a17e0248373f199c695719 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 5 Sep 2012 20:43:03 +0200 Subject: Add new binary conversion bifs Added: binary_to_integer/1,2, integer_to_binary/1,2 --- erts/emulator/beam/bif.c | 50 ++++++++- erts/emulator/beam/bif.tab | 4 + erts/emulator/beam/big.c | 247 +++++++++++++++++++++++++++++++++++++++++--- erts/emulator/beam/big.h | 3 + erts/emulator/beam/binary.c | 92 +++++++++++++++++ 5 files changed, 380 insertions(+), 16 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index df084e1185..a596068527 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -2895,20 +2895,64 @@ BIF_RETTYPE string_to_integer_1(BIF_ALIST_1) BIF_RET(TUPLE2(hp, res, tail)); } } - BIF_RETTYPE list_to_integer_1(BIF_ALIST_1) -{ + { + /* Using do_list_to_integer is about twice as fast as using + erts_chars_to_integer because we do not have to copy the + entire list */ Eterm res; Eterm dummy; /* must be a list */ - if (do_list_to_integer(BIF_P,BIF_ARG_1,&res,&dummy) != LTI_ALL_INTEGER) { BIF_ERROR(BIF_P,BADARG); } BIF_RET(res); } +BIF_RETTYPE list_to_integer_2(BIF_ALIST_2) +{ + + /* Bif implementation is about 50% faster than pure erlang, + and since we have erts_chars_to_integer now it is simpler + as well. This could be optmized further if we did not have to + copy the list to buf. */ + int i; + Eterm res; + char *buf = NULL; + int base; + + i = list_length(BIF_ARG_1); + if (i < 0) + BIF_ERROR(BIF_P, BADARG); + + base = signed_val(BIF_ARG_2); + + if (base < 2 || base > 36) + BIF_ERROR(BIF_P, BADARG); + + /* Take fast path if base it 10 */ + if (base == 10) + return list_to_integer_1(BIF_P,&BIF_ARG_1); + + buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1); + + if (intlist_to_buf(BIF_ARG_1, buf, i) < 0) + goto list_to_integer_1_error; + buf[i] = '\0'; /* null terminal */ + + if ((res = erts_chars_to_integer(BIF_P,buf,i,base)) == THE_NON_VALUE) + goto list_to_integer_1_error; + + erts_free(ERTS_ALC_T_TMP, (void *) buf); + BIF_RET(res); + + list_to_integer_1_error: + erts_free(ERTS_ALC_T_TMP, (void *) buf); + BIF_ERROR(BIF_P, BADARG); + + } + /**********************************************************************/ /* convert a float to a list of ascii characters */ diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index e313188901..aa6105de73 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -561,6 +561,10 @@ bif erlang:prepare_loading/2 bif erlang:finish_loading/1 bif erlang:insert_element/3 bif erlang:delete_element/2 +bif erlang:binary_to_integer/1 +bif erlang:binary_to_integer/2 +bif erlang:integer_to_binary/1 +bif erlang:list_to_integer/2 # # Obsolete diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 5a5b162b9c..acfcc845e4 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -1674,26 +1674,26 @@ int big_decimal_estimate(Wterm x) ** Convert a bignum into a string of decimal numbers */ -static void write_big(Wterm x, void (*write_func)(void *, char), void *arg) +static Uint write_big(Wterm x, void (*write_func)(void *, char), void *arg) { Eterm* xp = big_val(x); ErtsDigit* dx = BIG_V(xp); dsize_t xl = BIG_SIZE(xp); short sign = BIG_SIGN(xp); ErtsDigit rem; + Uint n = 0; if (xl == 1 && *dx < D_DECIMAL_BASE) { rem = *dx; - if (rem == 0) - (*write_func)(arg, '0'); - else { + if (rem == 0) { + (*write_func)(arg, '0'); n++; + } else { while(rem) { - (*write_func)(arg, (rem % 10) + '0'); + (*write_func)(arg, (rem % 10) + '0'); n++; rem /= 10; } } - } - else { + } else { ErtsDigit* tmp = (ErtsDigit*) erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsDigit)*xl); dsize_t tmpl = xl; @@ -1704,15 +1704,14 @@ static void write_big(Wterm x, void (*write_func)(void *, char), void *arg) tmpl = D_div(tmp, tmpl, D_DECIMAL_BASE, tmp, &rem); if (tmpl == 1 && *tmp == 0) { while(rem) { - (*write_func)(arg, (rem % 10)+'0'); + (*write_func)(arg, (rem % 10)+'0'); n++; rem /= 10; } break; - } - else { + } else { int i = D_DECIMAL_EXP; while(i--) { - (*write_func)(arg, (rem % 10)+'0'); + (*write_func)(arg, (rem % 10)+'0'); n++; rem /= 10; } } @@ -1720,8 +1719,10 @@ static void write_big(Wterm x, void (*write_func)(void *, char), void *arg) erts_free(ERTS_ALC_T_TMP, (void *) tmp); } - if (sign) - (*write_func)(arg, '-'); + if (sign) { + (*write_func)(arg, '-'); n++; + } + return n; } struct big_list__ { @@ -1762,6 +1763,20 @@ char *erts_big_to_string(Wterm x, char *buf, Uint buf_sz) return big_str; } +/* Bignum to binary bytes + * e.g. 1 bsl 64 -> "18446744073709551616" + */ + +Uint erts_big_to_binary_bytes(Eterm x, char *buf, Uint buf_sz) +{ + char *big_str = buf + buf_sz; + Uint n; + n = write_big(x, write_string, (void *) &big_str); + ASSERT(buf <= big_str && big_str <= buf + buf_sz); + return n; +} + + /* ** Normalize a bignum given thing pointer length in digits and a sign ** patch zero if odd length @@ -2467,3 +2482,209 @@ int term_equals_2pow32(Eterm x) return 0; } } + + +#define IS_VALID_CHARACTER(CHAR,BASE) \ + (CHAR < '0' \ + || (CHAR > ('0' + BASE - 1) \ + && !(BASE > 10 \ + && ((CHAR >= 'a' && CHAR < ('a' + BASE - 10)) \ + || (CHAR >= 'A' && CHAR < ('A' + BASE - 10)))))) +#define CHARACTER_FROM_BASE(CHAR) \ + ((CHAR <= '9') ? CHAR - '0' : 10 + ((CHAR <= 'Z') ? CHAR - 'A' : CHAR - 'a')) +#define D_BASE_EXP(BASE) (d_base_exp_lookup[BASE-2]) +#define D_BASE_BASE(BASE) (d_base_base_lookup[BASE-2]) +#define LG2_LOOKUP(BASE) (lg2_lookup[base-2]) + +/* + * for i in 2..64 do + * lg2_lookup[i-2] = log2(i) + * end + * How many bits are needed to store string of size n + */ +const double lg2_lookup[] = { 1.0, 1.58496, 2, 2.32193, 2.58496, 2.80735, 3.0, + 3.16993, 3.32193, 3.45943, 3.58496, 3.70044, 3.80735, 3.90689, 4.0, + 4.08746, 4.16993, 4.24793, 4.32193, 4.39232, 4.45943, 4.52356, 4.58496, + 4.64386, 4.70044, 4.75489, 4.80735, 4.85798, 4.90689, 4.9542, 5.0, + 5.04439, 5.08746, 5.12928, 5.16993, 5.20945, 5.24793, 5.2854, 5.32193, + 5.35755, 5.39232, 5.42626, 5.45943, 5.49185, 5.52356, 5.55459, 5.58496, + 5.61471, 5.64386, 5.67243, 5.70044, 5.72792, 5.75489, 5.78136, 5.80735, + 5.83289, 5.85798, 5.88264, 5.90689, 5.93074, 5.9542, 5.97728, 6.0 }; + +/* + * for i in 2..64 do + * d_base_exp_lookup[i-2] = 31 / lg2_lookup[i-2]; + * end + * How many characters can fit in 31 bits + */ +const byte d_base_exp_lookup[] = { 31, 19, 15, 13, 11, 11, 10, 9, 9, 8, 8, 8, 8, + 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5 }; + +/* + * for i in 2..64 do + * d_base_base_lookup[i-2] = pow(i,d_base_exp_lookup[i-2]); + * end + * How much can the characters which fit in 31 bit represent + */ +const Uint d_base_base_lookup[] = { 2147483648, 1162261467, 1073741824, + 1220703125, 362797056, 1977326743, 1073741824, 387420489, 1000000000, + 214358881, 429981696, 815730721, 1475789056, 170859375, 268435456, + 410338673, 612220032, 893871739, 1280000000, 1801088541, 113379904, + 148035889, 191102976, 244140625, 308915776, 387420489, 481890304, + 594823321, 729000000, 887503681, 1073741824, 1291467969, 1544804416, + 1838265625, 60466176, 69343957, 79235168, 90224199, 102400000, + 115856201, 130691232, 147008443, 164916224, 184528125, 205962976, + 229345007, 254803968, 282475249, 312500000, 345025251, 380204032, + 418195493, 459165024, 503284375, 550731776, 601692057, 656356768, + 714924299, 777600000, 844596301, 916132832, 992436543, 1073741824 }; + +Eterm erts_chars_to_integer(Process *BIF_P, char *bytes, + Uint size, const int base) { + Eterm res; + Sint i = 0; + int n = 0; + int neg = 0; + byte b; + Eterm *hp, *hp_end; + int m; + int lg2; + + if (size == 0) + goto bytebuf_to_integer_1_error; + + if (bytes[0] == '-') { + neg = 1; + bytes++; + size--; + + } else if (bytes[0] == '+') { + bytes++; + size--; + } + + if (size < SMALL_DIGITS && base <= 10) { + /* * + * Take shortcut if we know that all chars are '0' < b < '9' and + * fit in a small. This improves speed by about 10% over the generic + * small case. + * */ + while (size--) { + b = *bytes++; + + if (b < '0' || b > ('0'+base-1)) + goto bytebuf_to_integer_1_error; + + i = i * base + b - '0'; + } + + if (neg) + i = -i; + res = make_small(i); + goto bytebuf_to_integer_1_done; + } + + /* + * Calculate the maximum number of bits which will + * be needed to represent the binary + */ + lg2 = ((size+2)*LG2_LOOKUP(base)+1); + + if (lg2 < SMALL_BITS) { + /* Take shortcut if we know it will fit in a small. + * This improves speed by about 30%. + */ + while (size) { + b = *bytes++; + size--; + + if (IS_VALID_CHARACTER(b,base)) + goto bytebuf_to_integer_1_error; + + i = i * base + CHARACTER_FROM_BASE(b); + + } + + if (neg) + i = -i; + res = make_small(i); + goto bytebuf_to_integer_1_done; + + } + + /* Start calculating bignum */ + m = (lg2 + D_EXP-1)/D_EXP; + m = BIG_NEED_SIZE(m); + + hp = HAlloc(BIF_P, m); + hp_end = hp + m; + + if ((i = (size % D_BASE_EXP(base))) == 0) + i = D_BASE_EXP(base); + + n = size - i; + m = 0; + + while (i--) { + b = *bytes++; + + if (IS_VALID_CHARACTER(b,base)) { + HRelease(BIF_P, hp_end, hp); + goto bytebuf_to_integer_1_error; + } + + m = base * m + CHARACTER_FROM_BASE(b); + } + + res = small_to_big(m, hp); + + while (n) { + i = D_BASE_EXP(base); + n -= D_BASE_EXP(base); + m = 0; + while (i--) { + b = *bytes++; + + if (IS_VALID_CHARACTER(b,base)) { + HRelease(BIF_P, hp_end, hp); + goto bytebuf_to_integer_1_error; + } + + m = base * m + CHARACTER_FROM_BASE(b); + } + if (is_small(res)) { + res = small_to_big(signed_val(res), hp); + } + res = big_times_small(res, D_BASE_BASE(base), hp); + if (is_small(res)) { + res = small_to_big(signed_val(res), hp); + } + res = big_plus_small(res, m, hp); + } + + if (is_big(res)) /* check if small */ + res = big_plus_small(res, 0, hp); /* includes conversion to small */ + + if (neg) { + if (is_small(res)) + res = make_small(-signed_val(res)); + else { + Uint *big = big_val(res); /* point to thing */ + *big = bignum_header_neg(*big); + } + } + + if (is_big(res)) { + hp += (big_arity(res) + 1); + } + HRelease(BIF_P, hp_end, hp); + goto bytebuf_to_integer_1_done; + +bytebuf_to_integer_1_error: + return THE_NON_VALUE; + +bytebuf_to_integer_1_done: + return res; + +} diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 7eb1e5afe2..c74283b9e5 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -117,6 +117,7 @@ typedef Uint dsize_t; /* Vector size type */ int big_decimal_estimate(Wterm); Eterm erts_big_to_list(Eterm, Eterm**); char *erts_big_to_string(Wterm x, char *buf, Uint buf_sz); +Uint erts_big_to_binary_bytes(Eterm x, char *buf, Uint buf_sz); Eterm small_times(Sint, Sint, Eterm*); @@ -165,5 +166,7 @@ int term_equals_2pow32(Eterm); Eterm erts_uint64_to_big(Uint64, Eterm **); Eterm erts_sint64_to_big(Sint64, Eterm **); +Eterm erts_chars_to_integer(Process *, char*, Uint, const int); + #endif diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index dad13f1067..33abac2f3d 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -240,6 +240,98 @@ erts_bin_bytes_to_list(Eterm previous, Eterm* hp, byte* bytes, Uint size, Uint b return previous; } +BIF_RETTYPE binary_to_integer_1(BIF_ALIST_1) +{ + byte *temp_alloc = NULL; + char *bytes; + Uint size; + Eterm res; + + if ((bytes = (char*)erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc)) + == NULL ) + goto binary_to_integer_1_error; + + size = binary_size(BIF_ARG_1); + + if ((res = erts_chars_to_integer(BIF_P,bytes,size,10)) != THE_NON_VALUE) { + erts_free_aligned_binary_bytes(temp_alloc); + return res; + } + + binary_to_integer_1_error: + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE binary_to_integer_2(BIF_ALIST_2) +{ + byte *temp_alloc = NULL; + char *bytes; + Uint size; + int base; + Eterm res; + + if (!is_small(BIF_ARG_2)) + BIF_ERROR(BIF_P, BADARG); + + base = signed_val(BIF_ARG_2); + + if (base < 2 || base > 36) + BIF_ERROR(BIF_P, BADARG); + + if ((bytes = (char*)erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc)) + == NULL ) + goto binary_to_integer_2_error; + + size = binary_size(BIF_ARG_1); + + if ((res = erts_chars_to_integer(BIF_P,bytes,size,base)) != THE_NON_VALUE) { + erts_free_aligned_binary_bytes(temp_alloc); + return res; + } + + binary_to_integer_2_error: + + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(BIF_P, BADARG); + +} + +BIF_RETTYPE integer_to_binary_1(BIF_ALIST_1) +{ + Uint size; + Eterm res; + + if (is_not_integer(BIF_ARG_1)) { + BIF_ERROR(BIF_P, BADARG); + } + + if (is_small(BIF_ARG_1)) { + char *c; + struct Sint_buf ibuf; + + /* Enhancement: If we can calculate the buffer size exactly + * we could avoid an unnecessary copy of buffers. + * Useful if size determination is faster than a copy. + */ + c = Sint_to_buf(signed_val(BIF_ARG_1), &ibuf); + size = sys_strlen(c); + res = new_binary(BIF_P, (byte *)c, size); + } else { + byte* bytes; + Uint n = 0; + + /* Here we also have multiple copies of buffers + * due to new_binary interface + */ + size = big_decimal_estimate(BIF_ARG_1) - 1; /* remove null */ + bytes = (byte*) erts_alloc(ERTS_ALC_T_TMP, sizeof(byte)*size); + n = erts_big_to_binary_bytes(BIF_ARG_1, (char *)bytes, size); + res = new_binary(BIF_P, bytes + size - n, n); + erts_free(ERTS_ALC_T_TMP, (void *) bytes); + } + BIF_RET(res); +} BIF_RETTYPE binary_to_list_1(BIF_ALIST_1) { -- cgit v1.2.3