From 5d79f55ca441727578d34b78ee0d6d8aa80976ee Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Sat, 5 Jan 2013 03:07:14 +0100
Subject: Implement UTF-8 atom support for jinterface

---
 .../com/ericsson/otp/erlang/OtpOutputStream.java   | 60 ++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)

(limited to 'lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java')

diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
index 22ebb4688a..10bdf389cd 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
@@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream {
      *            the string to write.
      */
     public void write_atom(final String atom) {
-	write1(OtpExternal.atomTag);
-	write2BE(atom.length());
-	writeN(atom.getBytes());
+	String enc_atom;
+	byte[] bytes;
+	boolean isLatin1 = true;
+
+	if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) {
+	    enc_atom = atom;
+	}
+	else {
+	    /*
+	     * Throwing an exception would be better I think,
+	     * but truncation seems to be the way it has
+	     * been done in other parts of OTP...
+	     */
+	    enc_atom = new String(OtpErlangString.stringToCodePoints(atom),
+				  0, OtpExternal.maxAtomLength);
+	}
+
+	for (int offset = 0; offset < enc_atom.length();) {
+	    final int cp = enc_atom.codePointAt(offset);
+	    if ((cp & ~0xFF) != 0) {
+		isLatin1 = false;
+		break;
+	    }
+	    offset += Character.charCount(cp);
+	}
+	try {
+	    if (isLatin1) {
+		bytes = enc_atom.getBytes("ISO-8859-1");
+		write1(OtpExternal.atomTag);
+		write2BE(bytes.length);
+	    }
+	    else {
+		bytes = enc_atom.getBytes("UTF-8");
+		final int length = bytes.length;
+		if (length < 256) {
+		    write1(OtpExternal.smallAtomUtf8Tag);
+		    write1(length);
+		}
+		else {
+		    write1(OtpExternal.atomUtf8Tag);
+		    write2BE(length);
+		}
+	    }
+	    writeN(bytes);
+	} catch (final java.io.UnsupportedEncodingException e) {
+	    /*
+	     * Sigh, why didn't the API designer add an
+	     * OtpErlangEncodeException to these encoding
+	     * functions?!? Instead of changing the API we
+	     * write an invalid atom and let it fail for
+	     * whoever trying to decode this... Sigh,
+	     * again...
+	     */
+	    write1(OtpExternal.smallAtomUtf8Tag);
+	    write1(2);
+	    write2BE(0xffff); /* Invalid UTF-8 */
+	}
     }
 
     /**
-- 
cgit v1.2.3