From e81e9e367eb0f6bae1c818f5351a7118f832c31c Mon Sep 17 00:00:00 2001
From: Sverker Eriksson <sverker@erlang.org>
Date: Wed, 16 Jan 2013 20:44:09 +0100
Subject: atom fixes for NIFs and atom_to_binary

---
 erts/emulator/beam/erl_nif.c     | 24 +++++++++-----
 erts/emulator/beam/erl_unicode.c | 67 +++++-----------------------------------
 erts/emulator/beam/global.h      |  2 --
 3 files changed, 24 insertions(+), 69 deletions(-)

(limited to 'erts/emulator')

diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index 185ac75d73..f00d5f86ce 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -743,16 +743,23 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len,
 {
     Atom* ap;
     ASSERT(encoding == ERL_NIF_LATIN1);
-    if (is_not_atom(atom)) {
+    if (is_not_atom(atom) || len==0) {
 	return 0;
     }
     ap = atom_tab(atom_val(atom));
-    if (ap->len+1 > len) {
+
+    if (ap->latin1_chars < 0 || ap->latin1_chars >= len) {
 	return 0;
     }
-    sys_memcpy(buf, ap->name, ap->len);
-    buf[ap->len] = '\0';
-    return ap->len + 1;
+    if (ap->latin1_chars == ap->len) {
+	sys_memcpy(buf, ap->name, ap->len);
+    }
+    else {
+	int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len);
+	ASSERT(dlen == ap->latin1_chars); (void)dlen;
+    }
+    buf[ap->latin1_chars] = '\0';
+    return ap->latin1_chars + 1;
 }
 
 int enif_get_int(ErlNifEnv* env, Eterm term, int* ip)
@@ -854,7 +861,10 @@ int enif_get_atom_length(ErlNifEnv* env, Eterm atom, unsigned* len,
     ASSERT(enc == ERL_NIF_LATIN1);
     if (is_not_atom(atom)) return 0;
     ap = atom_tab(atom_val(atom));
-    *len = ap->len;
+    if (ap->latin1_chars < 0) {
+	return 0;
+    }
+    *len = ap->latin1_chars;
     return 1;
 }
 
@@ -961,7 +971,7 @@ ERL_NIF_TERM enif_make_atom(ErlNifEnv* env, const char* name)
 
 ERL_NIF_TERM enif_make_atom_len(ErlNifEnv* env, const char* name, size_t len)
 {
-    return am_atom_put(name, len);
+    return erts_atom_put((byte*)name, len, ERTS_ATOM_ENC_LATIN1, 1);
 }
 
 int enif_make_existing_atom(ErlNifEnv* env, const char* name, ERL_NIF_TERM* atom,
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 6600ce4a4a..c00293de89 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1853,32 +1853,21 @@ BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2)
     ap = atom_tab(atom_val(BIF_ARG_1));
 
     if (BIF_ARG_2 == am_latin1) {
-	int i;
 	Eterm bin_term;
-	int bin_size = ap->len;
-
-	for (i = 0; i < ap->len; ) {  
-	    if (ap->name[i] < 0x80) i++;
-	    else {
-		ASSERT(ap->name[i] >= 0xC0);
-		if (ap->name[i] < 0xE0) {
-		    ASSERT(i+1 < ap->len && (ap->name[i+1] & 0xC0) == 0x80);
-		    i += 2;
-		    bin_size -= 1;
-		}
-		else goto error;
-	    }
+
+	if (ap->latin1_chars < 0) {
+	    goto error;
 	}
-	if (bin_size == ap->len) {
+	if (ap->latin1_chars == ap->len) {
 	    bin_term = new_binary(BIF_P, ap->name, ap->len);
 	}
 	else {
 	    byte* bin_p;
 	    int dbg_sz;
-	    bin_term = new_binary(BIF_P, 0, bin_size);
+	    bin_term = new_binary(BIF_P, 0, ap->latin1_chars);
 	    bin_p = binary_bytes(bin_term);
 	    dbg_sz = erts_utf8_to_latin1(bin_p, ap->name, ap->len);
-	    ASSERT(dbg_sz == bin_size); (void)dbg_sz; 
+	    ASSERT(dbg_sz == ap->latin1_chars); (void)dbg_sz; 
 	}
 	BIF_RET(bin_term);
     } else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) {
@@ -2676,23 +2665,6 @@ BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0)
     }
 }
 
-int erts_utf8_is_latin1_string(const byte *string, int len)
-{
-    /* Assumes string is encoded in valid UTF-8 */
-    int i;
-    while (i < len) {
-	if ((string[i] & 0x80) == 0)
-	    i++;
-	else if (i+1 < len
-		 && (string[i] & 0xFE) == 0xC2
-		 && (string[i+1] & 0xC0) == 0x80)
-	    i +=2;
-	else
-	    return 0;
-    }
-    return 1;
-}
-
 int erts_utf8_to_latin1(byte* dest, const byte* source, int slen)
 {
     /*
@@ -2700,6 +2672,7 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen)
      * and that dest has enough room.
      */
     byte* dp = dest;
+
     while (slen > 0) {
 	if ((source[0] & 0x80) == 0) {
 	    *dp++ = *source++;
@@ -2717,29 +2690,3 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen)
     return dp - dest;
 }
 
-int erts_utf8_to_latin1_backwards(byte *dest, const byte *source, int slen)
-{
-    /*
-     * Assumes source contains valid utf8 that can be encoded as latin1,
-     * and that dest has enough room.
-     */
-    int dix = 0;
-    int six = slen;
-    while (six > 0) {
-	six--;
-	dix--;
-	if ((source[six] & 0x80) == 0)
-	    dest[dix] = source[six];
-	else {
-	    byte c;
-	    ASSERT(six > 0);
-	    ASSERT((source[six] & 0xC0) == 0x80);
-	    ASSERT((source[six-1] & 0xFE) == 0xC2);
- 	    c = source[six] & 0x3F;
-	    six--;
-	    c |= source[six] << 6;
-	    dest[dix] = c;
-	}
-    }
-    return -dix;
-}
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index eccdf10c75..649352ca91 100755
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -1530,9 +1530,7 @@ char *erts_convert_filename_to_native(Eterm name, char *statbuf,
 Eterm erts_convert_native_to_filename(Process *p, byte *bytes);
 Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
 			Uint *num_built, Uint *num_eaten, Eterm tail);
-int erts_utf8_is_latin1_string(const byte *string, int len);
 int erts_utf8_to_latin1(byte* dest, const byte* source, int slen);
-int erts_utf8_to_latin1_backwards(byte* dest, const byte* source, int slen);
 #define ERTS_UTF8_OK 0
 #define ERTS_UTF8_INCOMPLETE 1
 #define ERTS_UTF8_ERROR 2
-- 
cgit v1.2.3