aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r--erts/emulator/beam/erl_unicode.c181
1 files changed, 83 insertions, 98 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 3a968594f3..2d1d1443a7 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2017. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -54,7 +55,7 @@ static BIF_RETTYPE finalize_list_to_list(Process *p,
Uint num_processed_bytes,
Uint num_bytes_to_process,
Uint num_resulting_chars,
- int state, int left,
+ int state, Sint left,
Eterm tail);
static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3);
static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3);
@@ -122,19 +123,16 @@ static void cleanup_restart_context(RestartContext *rc)
}
}
-static void cleanup_restart_context_bin(Binary *bp)
+static int cleanup_restart_context_bin(Binary *bp)
{
RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp);
cleanup_restart_context(rc);
+ return 1;
}
-static RestartContext *get_rc_from_bin(Eterm bin)
+static RestartContext *get_rc_from_bin(Eterm mref)
{
- Binary *mbp;
- ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin));
-
- mbp = ((ProcBin *) binary_val(bin))->val;
-
+ Binary *mbp = erts_magic_ref2bin(mref);
ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp)
== cleanup_restart_context_bin);
return (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp);
@@ -147,8 +145,8 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc)
RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp);
Eterm *hp;
memcpy(restartp,rc,sizeof(RestartContext));
- hp = HAlloc(p, PROC_BIN_SIZE);
- return erts_mk_magic_binary_term(&hp, &MSO(p), mbp);
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+ return erts_mk_magic_ref(&hp, &MSO(p), mbp);
}
@@ -172,12 +170,13 @@ static ERTS_INLINE int allowed_iterations(Process *p)
else
return tmp;
}
-static ERTS_INLINE int cost_to_proc(Process *p, int cost)
+
+static ERTS_INLINE void cost_to_proc(Process *p, Sint cost)
{
- int x = (cost / LOOP_FACTOR);
+ Sint x = (cost / LOOP_FACTOR);
BUMP_REDS(p,x);
- return x;
}
+
static ERTS_INLINE int simple_loops_to_common(int cost)
{
int factor = (LOOP_FACTOR_SIMPLE / LOOP_FACTOR);
@@ -242,14 +241,15 @@ static int utf8_len(byte first)
return -1;
}
-static int copy_utf8_bin(byte *target, byte *source, Uint size,
- byte *leftover, int *num_leftovers,
- byte **err_pos, Uint *characters) {
- int copied = 0;
+static Uint copy_utf8_bin(byte *target, byte *source, Uint size,
+ byte *leftover, int *num_leftovers,
+ byte **err_pos, Uint *characters)
+{
+ Uint copied = 0;
if (leftover != NULL && *num_leftovers) {
int need = utf8_len(leftover[0]);
int from_source = need - (*num_leftovers);
- int c;
+ Uint c;
byte *tmp_err_pos = NULL;
ASSERT(need > 0);
ASSERT(from_source > 0);
@@ -501,8 +501,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
}
-static Eterm do_build_utf8(Process *p, Eterm ioterm, int *left, int latin1,
- byte *target, int *pos, Uint *characters, int *err,
+static Eterm do_build_utf8(Process *p, Eterm ioterm, Sint *left, int latin1,
+ byte *target, Uint *pos, Uint *characters, int *err,
byte *leftover, int *num_leftovers)
{
int c;
@@ -572,7 +572,7 @@ static Eterm do_build_utf8(Process *p, Eterm ioterm, int *left, int latin1,
}
if (!latin1) {
- int num;
+ Uint num;
byte *err_pos = NULL;
num = copy_utf8_bin(target + (*pos), bytes,
size, leftover, num_leftovers,&err_pos,characters);
@@ -803,7 +803,7 @@ static int check_leftovers(byte *source, int size)
-static BIF_RETTYPE build_utf8_return(Process *p,Eterm bin,int pos,
+static BIF_RETTYPE build_utf8_return(Process *p,Eterm bin,Uint pos,
Eterm rest_term,int err,
byte *leftover,int num_leftovers,Eterm latin1)
{
@@ -858,8 +858,8 @@ static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3)
#endif
byte* bytes;
Eterm rest_term;
- int left, sleft;
- int pos;
+ Sint left, sleft;
+ Uint pos;
int err;
byte leftover[4]; /* used for temp buffer too,
otherwise 3 bytes would have been enough */
@@ -873,7 +873,7 @@ static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3)
real_bin = binary_val(BIF_ARG_1);
ASSERT(*real_bin == HEADER_PROC_BIN);
#endif
- pos = (int) binary_size(BIF_ARG_1);
+ pos = binary_size(BIF_ARG_1);
bytes = binary_bytes(BIF_ARG_1);
sleft = left = allowed_iterations(BIF_P);
err = 0;
@@ -933,9 +933,9 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2)
int latin1;
Eterm bin;
byte *bytes;
- int pos;
+ Uint pos;
int err;
- int left, sleft;
+ Sint left, sleft;
Eterm rest_term, subject;
byte leftover[4]; /* used for temp buffer too, o
therwise 3 bytes would have been enough */
@@ -998,7 +998,7 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2)
byte *t = NULL;
Uint sz = binary_size(bin);
byte *by = erts_get_aligned_binary_bytes(bin,&t);
- int i;
+ Uint i;
erts_printf("<<");
for (i = 0;i < sz; ++i) {
erts_printf((i == sz -1) ? "0x%X" : "0x%X, ", (unsigned) by[i]);
@@ -1006,7 +1006,7 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2)
erts_printf(">>: ");
erts_free_aligned_binary_bytes(t);
}
- erts_printf("%d - %d = %d\n",sleft,left,sleft - left);
+ erts_printf("%ld - %ld = %ld\n", sleft, left, sleft - left);
}
#endif
cost_to_proc(BIF_P, sleft - left);
@@ -1014,10 +1014,10 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2)
leftover,num_leftovers,BIF_ARG_2);
}
-static BIF_RETTYPE build_list_return(Process *p, byte *bytes, int pos, Uint characters,
+static BIF_RETTYPE build_list_return(Process *p, byte *bytes, Uint pos, Uint characters,
Eterm rest_term, int err,
byte *leftover, int num_leftovers,
- Eterm latin1, int left)
+ Eterm latin1, Sint left)
{
Eterm *hp;
@@ -1069,11 +1069,11 @@ static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3)
{
RestartContext *rc;
byte* bytes;
- int pos;
+ Uint pos;
Uint characters;
int err;
Eterm rest_term;
- int left, sleft;
+ Sint left, sleft;
int latin1 = 0;
byte leftover[4]; /* used for temp buffer too,
@@ -1106,9 +1106,9 @@ BIF_RETTYPE unicode_characters_to_list_2(BIF_ALIST_2)
int latin1;
Uint characters = 0;
byte *bytes;
- int pos;
+ Uint pos;
int err;
- int left, sleft;
+ Sint left, sleft;
Eterm rest_term;
byte leftover[4]; /* used for temp buffer too, o
therwise 3 bytes would have been enough */
@@ -1540,7 +1540,7 @@ static BIF_RETTYPE finalize_list_to_list(Process *p,
Uint num_processed_bytes,
Uint num_bytes_to_process,
Uint num_resulting_chars,
- int state, int left,
+ int state, Sint left,
Eterm tail)
{
Uint num_built; /* characters */
@@ -1887,74 +1887,57 @@ binary_to_atom(Process* proc, Eterm bin, Eterm enc, int must_exist)
byte* bytes;
byte *temp_alloc = NULL;
Uint bin_size;
+ Eterm a;
if ((bytes = erts_get_aligned_binary_bytes(bin, &temp_alloc)) == 0) {
BIF_ERROR(proc, BADARG);
}
bin_size = binary_size(bin);
+
if (enc == am_latin1) {
- Eterm a;
- if (bin_size > MAX_ATOM_CHARACTERS) {
- system_limit:
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_ERROR(proc, SYSTEM_LIMIT);
- }
if (!must_exist) {
- a = erts_atom_put((byte *) bytes,
- bin_size,
- ERTS_ATOM_ENC_LATIN1,
- 0);
- erts_free_aligned_binary_bytes(temp_alloc);
- if (is_non_value(a))
- goto badarg;
- BIF_RET(a);
- } else if (erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) {
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_RET(a);
- } else {
+ int lix = erts_atom_put_index((byte *) bytes,
+ bin_size,
+ ERTS_ATOM_ENC_LATIN1,
+ 0);
+ if (lix == ATOM_BAD_ENCODING_ERROR) {
+ badarg:
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(proc, BADARG);
+ } else if (lix == ATOM_MAX_CHARS_ERROR) {
+ system_limit:
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(proc, SYSTEM_LIMIT);
+ }
+
+ a = make_atom(lix);
+ } else if (!erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) {
goto badarg;
}
- } else if (enc == am_utf8 || enc == am_unicode) {
- Eterm res;
- Uint num_chars = 0;
- const byte* p = bytes;
- Uint left = bin_size;
- while (left) {
- if (++num_chars > MAX_ATOM_CHARACTERS) {
+ } else if (enc == am_utf8 || enc == am_unicode) {
+ if (!must_exist) {
+ int uix = erts_atom_put_index((byte *) bytes,
+ bin_size,
+ ERTS_ATOM_ENC_UTF8,
+ 0);
+ if (uix == ATOM_BAD_ENCODING_ERROR) {
+ goto badarg;
+ } else if (uix == ATOM_MAX_CHARS_ERROR) {
goto system_limit;
}
- if ((p[0] & 0x80) == 0) {
- ++p;
- --left;
- }
- else if (left >= 2
- && (p[0] & 0xFE) == 0xC2 /* only allow latin1 subset */
- && (p[1] & 0xC0) == 0x80) {
- p += 2;
- left -= 2;
- }
- else goto badarg;
- }
- if (!must_exist) {
- res = erts_atom_put((byte *) bytes,
- bin_size,
- ERTS_ATOM_ENC_UTF8,
- 0);
+ a = make_atom(uix);
}
- else if (!erts_atom_get((char*)bytes, bin_size, &res, ERTS_ATOM_ENC_UTF8)) {
+ else if (!erts_atom_get((char*)bytes, bin_size, &a, ERTS_ATOM_ENC_UTF8)) {
goto badarg;
}
- erts_free_aligned_binary_bytes(temp_alloc);
- if (is_non_value(res))
- goto badarg;
- BIF_RET(res);
} else {
- badarg:
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_ERROR(proc, BADARG);
+ goto badarg;
}
+
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(a);
}
BIF_RETTYPE binary_to_atom_2(BIF_ALIST_2)
@@ -2015,7 +1998,7 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
++need;
}
if (used)
- *used = (Sint) need;
+ *used = need;
if (need+extra > statbuf_size) {
name_buf = (char *) erts_alloc(alloc_type, need+extra);
} else {
@@ -2126,6 +2109,8 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
mac = 1;
case ERL_FILENAME_UTF8:
size = strlen((char *) bytes);
+ if (size == 0)
+ return NIL;
if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) {
goto noconvert;
}
@@ -2504,7 +2489,7 @@ void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars)
((Uint) (bytes[3] & ((byte) 0x3F)));
bytes += 4;
} else {
- erl_exit(1,"Internal unicode error in prim_file:internal_name2native/1");
+ erts_exit(ERTS_ERROR_EXIT,"Internal unicode error in prim_file:internal_name2native/1");
}
*target++ = (byte) (unipoint & 0xFF);
*target++ = (byte) ((unipoint >> 8) & 0xFF);