aboutsummaryrefslogtreecommitdiffstats
path: root/lib/jinterface/java_src
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
committerSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
commitb8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch)
tree708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/jinterface/java_src
parente99df74bee7c245ec76678e336fcd09d4b51a089 (diff)
parentd6e3e256b850050b7a86323b2948009d5fcc30a9 (diff)
downloadotp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms: erl_interface: Fix bug when transcoding atoms from and to UTF8 erl_interface: Changed erlang_char_encoding interface erts: Testcase doing unicode atom printout with ~w erl_interface: even more utf8 atom stuff erts: Fix bug in analyze_utf8 causing faulty latin1 detection Add UTF-8 node name support for epmd workaround... Fix merge conflict with hasse UTF-8 atom documentation test case erl_interface: utf8 atoms continued Add utf8 atom distribution test cases atom fixes for NIFs and atom_to_binary UTF-8 support for distribution Implement UTF-8 atom support for jinterface erl_interface: Enable decode of unicode atoms stdlib: Fix printing of unicode atoms erts: Change internal representation of atoms to utf8 erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity Conflicts: erts/emulator/beam/io.c OTP-10753
Diffstat (limited to 'lib/jinterface/java_src')
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java4
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java2
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java6
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java64
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java60
5 files changed, 119 insertions, 17 deletions
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
index 16cb544a16..c76fad5e45 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
@@ -90,6 +90,8 @@ public class AbstractNode {
static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED
static final int dFlagBitBinaries = 0x400;
static final int dFlagNewFloats = 0x800;
+ static final int dFlagUnicodeIo = 0x1000;
+ static final int dFlagUtf8Atoms = 0x10000;
int ntype = NTYPE_R6;
int proto = 0; // tcp/ip
@@ -98,7 +100,7 @@ public class AbstractNode {
int creation = 0;
int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts
| dFlagBitBinaries | dFlagNewFloats | dFlagFunTags
- | dflagNewFunTags;
+ | dflagNewFunTags | dFlagUtf8Atoms;
/* initialize hostname and default cookie */
static {
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
index ced4dbb8c2..2768edc6fa 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
@@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable,
"null string value");
}
- if (atom.length() > maxAtomLength) {
+ if (atom.codePointCount(0, atom.length()) > maxAtomLength) {
throw new java.lang.IllegalArgumentException("Atom may not exceed "
+ maxAtomLength + " characters: " + atom);
}
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
index e70b9a786b..2a4cd4fa2d 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
@@ -88,6 +88,12 @@ public class OtpExternal {
/** The tag used for old Funs */
public static final int funTag = 117;
+ /** The tag used for unicode atoms */
+ public static final int atomUtf8Tag = 118;
+
+ /** The tag used for small unicode atoms */
+ public static final int smallAtomUtf8Tag = 119;
+
/** The tag used for compressed terms */
public static final int compressedTag = 80;
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
index ae5f4ee072..c2a79af841 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
@@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream {
*/
public String read_atom() throws OtpErlangDecodeException {
int tag;
- int len;
+ int len = -1;
byte[] strbuf;
String atom;
tag = read1skip_version();
- if (tag != OtpExternal.atomTag) {
- throw new OtpErlangDecodeException(
- "wrong tag encountered, expected " + OtpExternal.atomTag
- + ", got " + tag);
- }
+ switch (tag) {
- len = read2BE();
+ case OtpExternal.atomTag:
+ len = read2BE();
+ strbuf = new byte[len];
+ this.readN(strbuf);
+ try {
+ atom = new String(strbuf, "ISO-8859-1");
+ } catch (final java.io.UnsupportedEncodingException e) {
+ throw new OtpErlangDecodeException(
+ "Failed to decode ISO-8859-1 atom");
+ }
+ if (atom.length() > OtpExternal.maxAtomLength) {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ atom = atom.substring(0, OtpExternal.maxAtomLength);
+ }
+ break;
- strbuf = new byte[len];
- this.readN(strbuf);
- atom = OtpErlangString.newString(strbuf);
+ case OtpExternal.smallAtomUtf8Tag:
+ len = read1();
+ /* fall through */
+ case OtpExternal.atomUtf8Tag:
+ if (len < 0) {
+ len = read2BE();
+ }
+ strbuf = new byte[len];
+ this.readN(strbuf);
+ try {
+ atom = new String(strbuf, "UTF-8");
+ } catch (final java.io.UnsupportedEncodingException e) {
+ throw new OtpErlangDecodeException(
+ "Failed to decode UTF-8 atom");
+ }
+ if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ final int[] cps = OtpErlangString.stringToCodePoints(atom);
+ atom = new String(cps, 0, OtpExternal.maxAtomLength);
+ }
+ break;
- if (atom.length() > OtpExternal.maxAtomLength) {
- atom = atom.substring(0, OtpExternal.maxAtomLength);
+ default:
+ throw new OtpErlangDecodeException(
+ "wrong tag encountered, expected " + OtpExternal.atomTag
+ + ", or " + OtpExternal.atomUtf8Tag + ", got " + tag);
}
return atom;
@@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream {
return new OtpErlangLong(this);
case OtpExternal.atomTag:
+ case OtpExternal.smallAtomUtf8Tag:
+ case OtpExternal.atomUtf8Tag:
return new OtpErlangAtom(this);
case OtpExternal.floatTag:
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
index 22ebb4688a..10bdf389cd 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
@@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream {
* the string to write.
*/
public void write_atom(final String atom) {
- write1(OtpExternal.atomTag);
- write2BE(atom.length());
- writeN(atom.getBytes());
+ String enc_atom;
+ byte[] bytes;
+ boolean isLatin1 = true;
+
+ if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) {
+ enc_atom = atom;
+ }
+ else {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ enc_atom = new String(OtpErlangString.stringToCodePoints(atom),
+ 0, OtpExternal.maxAtomLength);
+ }
+
+ for (int offset = 0; offset < enc_atom.length();) {
+ final int cp = enc_atom.codePointAt(offset);
+ if ((cp & ~0xFF) != 0) {
+ isLatin1 = false;
+ break;
+ }
+ offset += Character.charCount(cp);
+ }
+ try {
+ if (isLatin1) {
+ bytes = enc_atom.getBytes("ISO-8859-1");
+ write1(OtpExternal.atomTag);
+ write2BE(bytes.length);
+ }
+ else {
+ bytes = enc_atom.getBytes("UTF-8");
+ final int length = bytes.length;
+ if (length < 256) {
+ write1(OtpExternal.smallAtomUtf8Tag);
+ write1(length);
+ }
+ else {
+ write1(OtpExternal.atomUtf8Tag);
+ write2BE(length);
+ }
+ }
+ writeN(bytes);
+ } catch (final java.io.UnsupportedEncodingException e) {
+ /*
+ * Sigh, why didn't the API designer add an
+ * OtpErlangEncodeException to these encoding
+ * functions?!? Instead of changing the API we
+ * write an invalid atom and let it fail for
+ * whoever trying to decode this... Sigh,
+ * again...
+ */
+ write1(OtpExternal.smallAtomUtf8Tag);
+ write1(2);
+ write2BE(0xffff); /* Invalid UTF-8 */
+ }
}
/**