From 5d79f55ca441727578d34b78ee0d6d8aa80976ee Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Sat, 5 Jan 2013 03:07:14 +0100
Subject: Implement UTF-8 atom support for jinterface

---
 .../com/ericsson/otp/erlang/AbstractNode.java      |  4 +-
 .../com/ericsson/otp/erlang/OtpErlangAtom.java     |  2 +-
 .../com/ericsson/otp/erlang/OtpExternal.java       |  6 ++
 .../com/ericsson/otp/erlang/OtpInputStream.java    | 64 ++++++++++++++++++----
 .../com/ericsson/otp/erlang/OtpOutputStream.java   | 60 +++++++++++++++++++-
 5 files changed, 119 insertions(+), 17 deletions(-)

(limited to 'lib/jinterface/java_src')

diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
index 16cb544a16..c76fad5e45 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
@@ -90,6 +90,8 @@ public class AbstractNode {
     static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED
     static final int dFlagBitBinaries = 0x400;
     static final int dFlagNewFloats = 0x800;
+    static final int dFlagUnicodeIo = 0x1000;
+    static final int dFlagUtf8Atoms = 0x10000;
 
     int ntype = NTYPE_R6;
     int proto = 0; // tcp/ip
@@ -98,7 +100,7 @@ public class AbstractNode {
     int creation = 0;
     int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts
 	    | dFlagBitBinaries | dFlagNewFloats | dFlagFunTags
-	    | dflagNewFunTags;
+	    | dflagNewFunTags | dFlagUtf8Atoms;
 
     /* initialize hostname and default cookie */
     static {
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
index ced4dbb8c2..2768edc6fa 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
@@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable,
 		    "null string value");
 	}
 
-	if (atom.length() > maxAtomLength) {
+	if (atom.codePointCount(0, atom.length()) > maxAtomLength) {
 	    throw new java.lang.IllegalArgumentException("Atom may not exceed "
 		    + maxAtomLength + " characters: " + atom);
 	}
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
index e70b9a786b..2a4cd4fa2d 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
@@ -88,6 +88,12 @@ public class OtpExternal {
     /** The tag used for old Funs */
     public static final int funTag = 117;
 
+    /** The tag used for unicode atoms */
+    public static final int atomUtf8Tag = 118;
+
+    /** The tag used for small unicode atoms */
+    public static final int smallAtomUtf8Tag = 119;
+
     /** The tag used for compressed terms */
     public static final int compressedTag = 80;
 
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
index ae5f4ee072..c2a79af841 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
@@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream {
      */
     public String read_atom() throws OtpErlangDecodeException {
 	int tag;
-	int len;
+	int len = -1;
 	byte[] strbuf;
 	String atom;
 
 	tag = read1skip_version();
 
-	if (tag != OtpExternal.atomTag) {
-	    throw new OtpErlangDecodeException(
-		    "wrong tag encountered, expected " + OtpExternal.atomTag
-			    + ", got " + tag);
-	}
+	switch (tag) {
 
-	len = read2BE();
+	case OtpExternal.atomTag:
+	    len = read2BE();
+	    strbuf = new byte[len];
+	    this.readN(strbuf);
+	    try {
+		atom = new String(strbuf, "ISO-8859-1");
+	    } catch (final java.io.UnsupportedEncodingException e) {
+		throw new OtpErlangDecodeException(
+		    "Failed to decode ISO-8859-1 atom");
+	    }
+	    if (atom.length() > OtpExternal.maxAtomLength) {
+		/*
+		 * Throwing an exception would be better I think,
+		 * but truncation seems to be the way it has
+		 * been done in other parts of OTP...
+		 */
+		atom = atom.substring(0, OtpExternal.maxAtomLength);
+	    }
+	    break;
 
-	strbuf = new byte[len];
-	this.readN(strbuf);
-	atom = OtpErlangString.newString(strbuf);
+	case OtpExternal.smallAtomUtf8Tag:
+	    len = read1();
+	    /* fall through */
+	case OtpExternal.atomUtf8Tag:
+	    if (len < 0) {
+		len = read2BE();
+	    }
+	    strbuf = new byte[len];
+	    this.readN(strbuf);
+	    try {
+		atom = new String(strbuf, "UTF-8");
+	    } catch (final java.io.UnsupportedEncodingException e) {
+		throw new OtpErlangDecodeException(
+		    "Failed to decode UTF-8 atom");
+	    }
+	    if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) {
+		/*
+		 * Throwing an exception would be better I think,
+		 * but truncation seems to be the way it has
+		 * been done in other parts of OTP...
+		 */
+		final int[] cps = OtpErlangString.stringToCodePoints(atom);
+		atom = new String(cps, 0, OtpExternal.maxAtomLength);
+	    }
+	    break;
 
-	if (atom.length() > OtpExternal.maxAtomLength) {
-	    atom = atom.substring(0, OtpExternal.maxAtomLength);
+	default:
+	    throw new OtpErlangDecodeException(
+		"wrong tag encountered, expected " + OtpExternal.atomTag
+		+ ", or "  + OtpExternal.atomUtf8Tag + ", got " + tag);
 	}
 
 	return atom;
@@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream {
 	    return new OtpErlangLong(this);
 
 	case OtpExternal.atomTag:
+	case OtpExternal.smallAtomUtf8Tag:
+	case OtpExternal.atomUtf8Tag:
 	    return new OtpErlangAtom(this);
 
 	case OtpExternal.floatTag:
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
index 22ebb4688a..10bdf389cd 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
@@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream {
      *            the string to write.
      */
     public void write_atom(final String atom) {
-	write1(OtpExternal.atomTag);
-	write2BE(atom.length());
-	writeN(atom.getBytes());
+	String enc_atom;
+	byte[] bytes;
+	boolean isLatin1 = true;
+
+	if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) {
+	    enc_atom = atom;
+	}
+	else {
+	    /*
+	     * Throwing an exception would be better I think,
+	     * but truncation seems to be the way it has
+	     * been done in other parts of OTP...
+	     */
+	    enc_atom = new String(OtpErlangString.stringToCodePoints(atom),
+				  0, OtpExternal.maxAtomLength);
+	}
+
+	for (int offset = 0; offset < enc_atom.length();) {
+	    final int cp = enc_atom.codePointAt(offset);
+	    if ((cp & ~0xFF) != 0) {
+		isLatin1 = false;
+		break;
+	    }
+	    offset += Character.charCount(cp);
+	}
+	try {
+	    if (isLatin1) {
+		bytes = enc_atom.getBytes("ISO-8859-1");
+		write1(OtpExternal.atomTag);
+		write2BE(bytes.length);
+	    }
+	    else {
+		bytes = enc_atom.getBytes("UTF-8");
+		final int length = bytes.length;
+		if (length < 256) {
+		    write1(OtpExternal.smallAtomUtf8Tag);
+		    write1(length);
+		}
+		else {
+		    write1(OtpExternal.atomUtf8Tag);
+		    write2BE(length);
+		}
+	    }
+	    writeN(bytes);
+	} catch (final java.io.UnsupportedEncodingException e) {
+	    /*
+	     * Sigh, why didn't the API designer add an
+	     * OtpErlangEncodeException to these encoding
+	     * functions?!? Instead of changing the API we
+	     * write an invalid atom and let it fail for
+	     * whoever trying to decode this... Sigh,
+	     * again...
+	     */
+	    write1(OtpExternal.smallAtomUtf8Tag);
+	    write1(2);
+	    write2BE(0xffff); /* Invalid UTF-8 */
+	}
     }
 
     /**
-- 
cgit v1.2.3