diff options
author | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
commit | b8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch) | |
tree | 708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/erl_interface/include | |
parent | e99df74bee7c245ec76678e336fcd09d4b51a089 (diff) | |
parent | d6e3e256b850050b7a86323b2948009d5fcc30a9 (diff) | |
download | otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2 otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip |
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms:
erl_interface: Fix bug when transcoding atoms from and to UTF8
erl_interface: Changed erlang_char_encoding interface
erts: Testcase doing unicode atom printout with ~w
erl_interface: even more utf8 atom stuff
erts: Fix bug in analyze_utf8 causing faulty latin1 detection
Add UTF-8 node name support for epmd
workaround...
Fix merge conflict with hasse
UTF-8 atom documentation
test case
erl_interface: utf8 atoms continued
Add utf8 atom distribution test cases
atom fixes for NIFs and atom_to_binary
UTF-8 support for distribution
Implement UTF-8 atom support for jinterface
erl_interface: Enable decode of unicode atoms
stdlib: Fix printing of unicode atoms
erts: Change internal representation of atoms to utf8
erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity
Conflicts:
erts/emulator/beam/io.c
OTP-10753
Diffstat (limited to 'lib/erl_interface/include')
-rw-r--r-- | lib/erl_interface/include/ei.h | 40 | ||||
-rw-r--r-- | lib/erl_interface/include/erl_interface.h | 40 |
2 files changed, 61 insertions, 19 deletions
diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h index ae815b414a..2278a28adb 100644 --- a/lib/erl_interface/include/ei.h +++ b/lib/erl_interface/include/ei.h @@ -115,6 +115,9 @@ #define ERL_FLOAT_EXT 'c' #define NEW_FLOAT_EXT 'F' #define ERL_ATOM_EXT 'd' +#define ERL_SMALL_ATOM_EXT 's' +#define ERL_ATOM_UTF8_EXT 'v' +#define ERL_SMALL_ATOM_UTF8_EXT 'w' #define ERL_REFERENCE_EXT 'e' #define ERL_NEW_REFERENCE_EXT 'r' #define ERL_PORT_EXT 'f' @@ -183,12 +186,21 @@ extern volatile int __erl_errno; #define EI_MAXHOSTNAMELEN 64 #define EI_MAXALIVELEN 63 #define EI_MAX_COOKIE_SIZE 512 -#define MAXATOMLEN 255 +#define MAXATOMLEN (255 + 1) +#define MAXATOMLEN_UTF8 (255*4 + 1) #define MAXNODELEN EI_MAXALIVELEN+1+EI_MAXHOSTNAMELEN +enum erlang_char_encoding { + ERLANG_ASCII = 1, + ERLANG_LATIN1 = 2, + ERLANG_UTF8 = 4, + ERLANG_ANY = ERLANG_ASCII|ERLANG_LATIN1|ERLANG_UTF8 +}; + /* a pid */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; unsigned int num; unsigned int serial; unsigned int creation; @@ -196,14 +208,16 @@ typedef struct { /* a port */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; unsigned int id; unsigned int creation; } erlang_port; /* a ref */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; int len; unsigned int n[3]; unsigned int creation; @@ -223,15 +237,16 @@ typedef struct { long msgtype; erlang_pid from; erlang_pid to; - char toname[MAXATOMLEN+1]; - char cookie[MAXATOMLEN+1]; + char toname[MAXATOMLEN_UTF8]; + char cookie[MAXATOMLEN_UTF8]; erlang_trace token; } erlang_msg; /* a fun */ typedef struct { long arity; - char module[MAXATOMLEN+1]; + char module[MAXATOMLEN_UTF8]; + enum erlang_char_encoding module_org_enc; char md5[16]; long index; long old_index; @@ -256,7 +271,7 @@ typedef struct { union { long i_val; double d_val; - char atom_name[MAXATOMLEN+1]; + char atom_name[MAXATOMLEN_UTF8]; erlang_pid pid; erlang_port port; erlang_ref ref; @@ -425,9 +440,17 @@ int ei_encode_string_len(char *buf, int *index, const char *p, int len); int ei_x_encode_string(ei_x_buff* x, const char* s); int ei_x_encode_string_len(ei_x_buff* x, const char* s, int len); int ei_encode_atom(char *buf, int *index, const char *p); +int ei_encode_atom_as(char *buf, int *index, const char *p, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_encode_atom_len(char *buf, int *index, const char *p, int len); +int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_x_encode_atom(ei_x_buff* x, const char* s); +int ei_x_encode_atom_as(ei_x_buff* x, const char* s, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len); +int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_encode_binary(char *buf, int *index, const void *p, long len); int ei_x_encode_binary(ei_x_buff* x, const void* s, int len); int ei_encode_pid(char *buf, int *index, const erlang_pid *p); @@ -477,6 +500,7 @@ int ei_decode_boolean(const char *buf, int *index, int *p); int ei_decode_char(const char *buf, int *index, char *p); int ei_decode_string(const char *buf, int *index, char *p); int ei_decode_atom(const char *buf, int *index, char *p); +int ei_decode_atom_as(const char *buf, int *index, char *p, int destlen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result); int ei_decode_binary(const char *buf, int *index, void *p, long *len); int ei_decode_fun(const char* buf, int* index, erlang_fun* p); void free_fun(erlang_fun* f); diff --git a/lib/erl_interface/include/erl_interface.h b/lib/erl_interface/include/erl_interface.h index 1c4a94700d..98acc0d71d 100644 --- a/lib/erl_interface/include/erl_interface.h +++ b/lib/erl_interface/include/erl_interface.h @@ -95,19 +95,24 @@ #define ERL_FLOAT_VALUE(x) ((x)->uval.fval.f) -#define ERL_ATOM_PTR(x) ((x)->uval.aval.a) -#define ERL_ATOM_SIZE(x) ((x)->uval.aval.len) +#define ERL_ATOM_PTR(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_PTR_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_SIZE(x) erl_atom_size_latin1((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_SIZE_UTF8(x) erl_atom_size_utf8((Erl_Atom_data*) &(x)->uval.aval.d) -#define ERL_PID_NODE(x) ((x)->uval.pidval.node) +#define ERL_PID_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.pidval.node) +#define ERL_PID_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.pidval.node) #define ERL_PID_NUMBER(x) ((x)->uval.pidval.number) #define ERL_PID_SERIAL(x) ((x)->uval.pidval.serial) #define ERL_PID_CREATION(x) ((x)->uval.pidval.creation) -#define ERL_PORT_NODE(x) ((x)->uval.portval.node) +#define ERL_PORT_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.portval.node) +#define ERL_PORT_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.portval.node) #define ERL_PORT_NUMBER(x) ((x)->uval.portval.number) #define ERL_PORT_CREATION(x) ((x)->uval.portval.creation) -#define ERL_REF_NODE(x) ((x)->uval.refval.node) +#define ERL_REF_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.refval.node) +#define ERL_REF_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.refval.node) #define ERL_REF_NUMBER(x) ((x)->uval.refval.n[0]) #define ERL_REF_NUMBERS(x) ((x)->uval.refval.n) #define ERL_REF_LEN(x) ((x)->uval.refval.len) @@ -183,14 +188,26 @@ typedef struct { } Erl_Float; typedef struct { + char *utf8; + int lenU; + char *latin1; + int lenL; +} Erl_Atom_data; + +char* erl_atom_ptr_latin1(Erl_Atom_data*); +char* erl_atom_ptr_utf8(Erl_Atom_data*); +int erl_atom_size_latin1(Erl_Atom_data*); +int erl_atom_size_utf8(Erl_Atom_data*); +char* erl_atom_init_latin1(Erl_Atom_data*, const char*); + +typedef struct { Erl_Header h; - int len; - char *a; + Erl_Atom_data d; } Erl_Atom; typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; unsigned int number; unsigned int serial; unsigned char creation; @@ -198,14 +215,14 @@ typedef struct { typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; unsigned int number; unsigned char creation; } Erl_Port; typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; int len; unsigned int n[3]; unsigned char creation; @@ -289,7 +306,7 @@ typedef struct _eterm { } ETERM; -#define MAXREGLEN 255 /* max length of registered (atom) name */ +#define MAXREGLEN (255*4) /* max length of registered (atom) name */ typedef struct { int type; /* one of the message type constants in eiext.h */ @@ -409,6 +426,7 @@ unsigned char erl_ext_type(unsigned char*); /* Note: returned 'char' before R9C unsigned char *erl_peek_ext(unsigned char*,int); int erl_term_len(ETERM*); +int cmp_latin1_vs_utf8(const char* sL, int lenL, const char* sU, int lenU); /* -------------------------------------------------------------------- */ /* Wrappers around ei functions */ |