diff options
| author | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 | 
|---|---|---|
| committer | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 | 
| commit | b8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch) | |
| tree | 708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/jinterface/java_src/com | |
| parent | e99df74bee7c245ec76678e336fcd09d4b51a089 (diff) | |
| parent | d6e3e256b850050b7a86323b2948009d5fcc30a9 (diff) | |
| download | otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2 otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip | |
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms:
  erl_interface: Fix bug when transcoding atoms from and to UTF8
  erl_interface: Changed erlang_char_encoding interface
  erts: Testcase doing unicode atom printout with ~w
  erl_interface: even more utf8 atom stuff
  erts: Fix bug in analyze_utf8 causing faulty latin1 detection
  Add UTF-8 node name support for epmd
  workaround...
  Fix merge conflict with hasse
  UTF-8 atom documentation
  test case
  erl_interface: utf8 atoms continued
  Add utf8 atom distribution test cases
  atom fixes for NIFs and atom_to_binary
  UTF-8 support for distribution
  Implement UTF-8 atom support for jinterface
  erl_interface: Enable decode of unicode atoms
  stdlib: Fix printing of unicode atoms
  erts: Change internal representation of atoms to utf8
  erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity
Conflicts:
	erts/emulator/beam/io.c
OTP-10753
Diffstat (limited to 'lib/jinterface/java_src/com')
5 files changed, 119 insertions, 17 deletions
| diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java index 16cb544a16..c76fad5e45 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java @@ -90,6 +90,8 @@ public class AbstractNode {      static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED      static final int dFlagBitBinaries = 0x400;      static final int dFlagNewFloats = 0x800; +    static final int dFlagUnicodeIo = 0x1000; +    static final int dFlagUtf8Atoms = 0x10000;      int ntype = NTYPE_R6;      int proto = 0; // tcp/ip @@ -98,7 +100,7 @@ public class AbstractNode {      int creation = 0;      int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts  	    | dFlagBitBinaries | dFlagNewFloats | dFlagFunTags -	    | dflagNewFunTags; +	    | dflagNewFunTags | dFlagUtf8Atoms;      /* initialize hostname and default cookie */      static { diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java index ced4dbb8c2..2768edc6fa 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java @@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable,  		    "null string value");  	} -	if (atom.length() > maxAtomLength) { +	if (atom.codePointCount(0, atom.length()) > maxAtomLength) {  	    throw new java.lang.IllegalArgumentException("Atom may not exceed "  		    + maxAtomLength + " characters: " + atom);  	} diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java index e70b9a786b..2a4cd4fa2d 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java @@ -88,6 +88,12 @@ public class OtpExternal {      /** The tag used for old Funs */      public static final int funTag = 117; +    /** The tag used for unicode atoms */ +    public static final int atomUtf8Tag = 118; + +    /** The tag used for small unicode atoms */ +    public static final int smallAtomUtf8Tag = 119; +      /** The tag used for compressed terms */      public static final int compressedTag = 80; diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java index ae5f4ee072..c2a79af841 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java @@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream {       */      public String read_atom() throws OtpErlangDecodeException {  	int tag; -	int len; +	int len = -1;  	byte[] strbuf;  	String atom;  	tag = read1skip_version(); -	if (tag != OtpExternal.atomTag) { -	    throw new OtpErlangDecodeException( -		    "wrong tag encountered, expected " + OtpExternal.atomTag -			    + ", got " + tag); -	} +	switch (tag) { -	len = read2BE(); +	case OtpExternal.atomTag: +	    len = read2BE(); +	    strbuf = new byte[len]; +	    this.readN(strbuf); +	    try { +		atom = new String(strbuf, "ISO-8859-1"); +	    } catch (final java.io.UnsupportedEncodingException e) { +		throw new OtpErlangDecodeException( +		    "Failed to decode ISO-8859-1 atom"); +	    } +	    if (atom.length() > OtpExternal.maxAtomLength) { +		/* +		 * Throwing an exception would be better I think, +		 * but truncation seems to be the way it has +		 * been done in other parts of OTP... +		 */ +		atom = atom.substring(0, OtpExternal.maxAtomLength); +	    } +	    break; -	strbuf = new byte[len]; -	this.readN(strbuf); -	atom = OtpErlangString.newString(strbuf); +	case OtpExternal.smallAtomUtf8Tag: +	    len = read1(); +	    /* fall through */ +	case OtpExternal.atomUtf8Tag: +	    if (len < 0) { +		len = read2BE(); +	    } +	    strbuf = new byte[len]; +	    this.readN(strbuf); +	    try { +		atom = new String(strbuf, "UTF-8"); +	    } catch (final java.io.UnsupportedEncodingException e) { +		throw new OtpErlangDecodeException( +		    "Failed to decode UTF-8 atom"); +	    } +	    if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) { +		/* +		 * Throwing an exception would be better I think, +		 * but truncation seems to be the way it has +		 * been done in other parts of OTP... +		 */ +		final int[] cps = OtpErlangString.stringToCodePoints(atom); +		atom = new String(cps, 0, OtpExternal.maxAtomLength); +	    } +	    break; -	if (atom.length() > OtpExternal.maxAtomLength) { -	    atom = atom.substring(0, OtpExternal.maxAtomLength); +	default: +	    throw new OtpErlangDecodeException( +		"wrong tag encountered, expected " + OtpExternal.atomTag +		+ ", or "  + OtpExternal.atomUtf8Tag + ", got " + tag);  	}  	return atom; @@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream {  	    return new OtpErlangLong(this);  	case OtpExternal.atomTag: +	case OtpExternal.smallAtomUtf8Tag: +	case OtpExternal.atomUtf8Tag:  	    return new OtpErlangAtom(this);  	case OtpExternal.floatTag: diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java index 22ebb4688a..10bdf389cd 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java @@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream {       *            the string to write.       */      public void write_atom(final String atom) { -	write1(OtpExternal.atomTag); -	write2BE(atom.length()); -	writeN(atom.getBytes()); +	String enc_atom; +	byte[] bytes; +	boolean isLatin1 = true; + +	if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) { +	    enc_atom = atom; +	} +	else { +	    /* +	     * Throwing an exception would be better I think, +	     * but truncation seems to be the way it has +	     * been done in other parts of OTP... +	     */ +	    enc_atom = new String(OtpErlangString.stringToCodePoints(atom), +				  0, OtpExternal.maxAtomLength); +	} + +	for (int offset = 0; offset < enc_atom.length();) { +	    final int cp = enc_atom.codePointAt(offset); +	    if ((cp & ~0xFF) != 0) { +		isLatin1 = false; +		break; +	    } +	    offset += Character.charCount(cp); +	} +	try { +	    if (isLatin1) { +		bytes = enc_atom.getBytes("ISO-8859-1"); +		write1(OtpExternal.atomTag); +		write2BE(bytes.length); +	    } +	    else { +		bytes = enc_atom.getBytes("UTF-8"); +		final int length = bytes.length; +		if (length < 256) { +		    write1(OtpExternal.smallAtomUtf8Tag); +		    write1(length); +		} +		else { +		    write1(OtpExternal.atomUtf8Tag); +		    write2BE(length); +		} +	    } +	    writeN(bytes); +	} catch (final java.io.UnsupportedEncodingException e) { +	    /* +	     * Sigh, why didn't the API designer add an +	     * OtpErlangEncodeException to these encoding +	     * functions?!? Instead of changing the API we +	     * write an invalid atom and let it fail for +	     * whoever trying to decode this... Sigh, +	     * again... +	     */ +	    write1(OtpExternal.smallAtomUtf8Tag); +	    write1(2); +	    write2BE(0xffff); /* Invalid UTF-8 */ +	}      }      /** | 
