From 5d79f55ca441727578d34b78ee0d6d8aa80976ee Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Sat, 5 Jan 2013 03:07:14 +0100 Subject: Implement UTF-8 atom support for jinterface --- .../com/ericsson/otp/erlang/AbstractNode.java | 4 +- .../com/ericsson/otp/erlang/OtpErlangAtom.java | 2 +- .../com/ericsson/otp/erlang/OtpExternal.java | 6 ++ .../com/ericsson/otp/erlang/OtpInputStream.java | 64 ++++++++++++++++++---- .../com/ericsson/otp/erlang/OtpOutputStream.java | 60 +++++++++++++++++++- 5 files changed, 119 insertions(+), 17 deletions(-) (limited to 'lib/jinterface/java_src') diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java index 16cb544a16..c76fad5e45 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java @@ -90,6 +90,8 @@ public class AbstractNode { static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED static final int dFlagBitBinaries = 0x400; static final int dFlagNewFloats = 0x800; + static final int dFlagUnicodeIo = 0x1000; + static final int dFlagUtf8Atoms = 0x10000; int ntype = NTYPE_R6; int proto = 0; // tcp/ip @@ -98,7 +100,7 @@ public class AbstractNode { int creation = 0; int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts | dFlagBitBinaries | dFlagNewFloats | dFlagFunTags - | dflagNewFunTags; + | dflagNewFunTags | dFlagUtf8Atoms; /* initialize hostname and default cookie */ static { diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java index ced4dbb8c2..2768edc6fa 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java @@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable, "null string value"); } - if (atom.length() > maxAtomLength) { + if (atom.codePointCount(0, atom.length()) > maxAtomLength) { throw new java.lang.IllegalArgumentException("Atom may not exceed " + maxAtomLength + " characters: " + atom); } diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java index e70b9a786b..2a4cd4fa2d 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java @@ -88,6 +88,12 @@ public class OtpExternal { /** The tag used for old Funs */ public static final int funTag = 117; + /** The tag used for unicode atoms */ + public static final int atomUtf8Tag = 118; + + /** The tag used for small unicode atoms */ + public static final int smallAtomUtf8Tag = 119; + /** The tag used for compressed terms */ public static final int compressedTag = 80; diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java index ae5f4ee072..c2a79af841 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java @@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream { */ public String read_atom() throws OtpErlangDecodeException { int tag; - int len; + int len = -1; byte[] strbuf; String atom; tag = read1skip_version(); - if (tag != OtpExternal.atomTag) { - throw new OtpErlangDecodeException( - "wrong tag encountered, expected " + OtpExternal.atomTag - + ", got " + tag); - } + switch (tag) { - len = read2BE(); + case OtpExternal.atomTag: + len = read2BE(); + strbuf = new byte[len]; + this.readN(strbuf); + try { + atom = new String(strbuf, "ISO-8859-1"); + } catch (final java.io.UnsupportedEncodingException e) { + throw new OtpErlangDecodeException( + "Failed to decode ISO-8859-1 atom"); + } + if (atom.length() > OtpExternal.maxAtomLength) { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + atom = atom.substring(0, OtpExternal.maxAtomLength); + } + break; - strbuf = new byte[len]; - this.readN(strbuf); - atom = OtpErlangString.newString(strbuf); + case OtpExternal.smallAtomUtf8Tag: + len = read1(); + /* fall through */ + case OtpExternal.atomUtf8Tag: + if (len < 0) { + len = read2BE(); + } + strbuf = new byte[len]; + this.readN(strbuf); + try { + atom = new String(strbuf, "UTF-8"); + } catch (final java.io.UnsupportedEncodingException e) { + throw new OtpErlangDecodeException( + "Failed to decode UTF-8 atom"); + } + if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + final int[] cps = OtpErlangString.stringToCodePoints(atom); + atom = new String(cps, 0, OtpExternal.maxAtomLength); + } + break; - if (atom.length() > OtpExternal.maxAtomLength) { - atom = atom.substring(0, OtpExternal.maxAtomLength); + default: + throw new OtpErlangDecodeException( + "wrong tag encountered, expected " + OtpExternal.atomTag + + ", or " + OtpExternal.atomUtf8Tag + ", got " + tag); } return atom; @@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream { return new OtpErlangLong(this); case OtpExternal.atomTag: + case OtpExternal.smallAtomUtf8Tag: + case OtpExternal.atomUtf8Tag: return new OtpErlangAtom(this); case OtpExternal.floatTag: diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java index 22ebb4688a..10bdf389cd 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java @@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream { * the string to write. */ public void write_atom(final String atom) { - write1(OtpExternal.atomTag); - write2BE(atom.length()); - writeN(atom.getBytes()); + String enc_atom; + byte[] bytes; + boolean isLatin1 = true; + + if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) { + enc_atom = atom; + } + else { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + enc_atom = new String(OtpErlangString.stringToCodePoints(atom), + 0, OtpExternal.maxAtomLength); + } + + for (int offset = 0; offset < enc_atom.length();) { + final int cp = enc_atom.codePointAt(offset); + if ((cp & ~0xFF) != 0) { + isLatin1 = false; + break; + } + offset += Character.charCount(cp); + } + try { + if (isLatin1) { + bytes = enc_atom.getBytes("ISO-8859-1"); + write1(OtpExternal.atomTag); + write2BE(bytes.length); + } + else { + bytes = enc_atom.getBytes("UTF-8"); + final int length = bytes.length; + if (length < 256) { + write1(OtpExternal.smallAtomUtf8Tag); + write1(length); + } + else { + write1(OtpExternal.atomUtf8Tag); + write2BE(length); + } + } + writeN(bytes); + } catch (final java.io.UnsupportedEncodingException e) { + /* + * Sigh, why didn't the API designer add an + * OtpErlangEncodeException to these encoding + * functions?!? Instead of changing the API we + * write an invalid atom and let it fail for + * whoever trying to decode this... Sigh, + * again... + */ + write1(OtpExternal.smallAtomUtf8Tag); + write1(2); + write2BE(0xffff); /* Invalid UTF-8 */ + } } /** -- cgit v1.2.3