diff options
author | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
commit | b8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch) | |
tree | 708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/erl_interface/src/misc | |
parent | e99df74bee7c245ec76678e336fcd09d4b51a089 (diff) | |
parent | d6e3e256b850050b7a86323b2948009d5fcc30a9 (diff) | |
download | otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2 otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip |
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms:
erl_interface: Fix bug when transcoding atoms from and to UTF8
erl_interface: Changed erlang_char_encoding interface
erts: Testcase doing unicode atom printout with ~w
erl_interface: even more utf8 atom stuff
erts: Fix bug in analyze_utf8 causing faulty latin1 detection
Add UTF-8 node name support for epmd
workaround...
Fix merge conflict with hasse
UTF-8 atom documentation
test case
erl_interface: utf8 atoms continued
Add utf8 atom distribution test cases
atom fixes for NIFs and atom_to_binary
UTF-8 support for distribution
Implement UTF-8 atom support for jinterface
erl_interface: Enable decode of unicode atoms
stdlib: Fix printing of unicode atoms
erts: Change internal representation of atoms to utf8
erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity
Conflicts:
erts/emulator/beam/io.c
OTP-10753
Diffstat (limited to 'lib/erl_interface/src/misc')
-rw-r--r-- | lib/erl_interface/src/misc/ei_decode_term.c | 40 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/ei_format.c | 4 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/ei_printterm.c | 7 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/ei_x_encode.c | 21 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/get_type.c | 79 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/putget.h | 7 | ||||
-rw-r--r-- | lib/erl_interface/src/misc/show_msg.c | 14 |
7 files changed, 54 insertions, 118 deletions
diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c index 0b82ef0e35..65afee89cc 100644 --- a/lib/erl_interface/src/misc/ei_decode_term.c +++ b/lib/erl_interface/src/misc/ei_decode_term.c @@ -32,7 +32,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) { const char* s = buf + *index, * s0 = s; - int len, i, n, sign; + int i, n, sign; char c; if (term == NULL) return -1; @@ -48,20 +48,13 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) case NEW_FLOAT_EXT: return ei_decode_double(buf, index, &term->value.d_val); case ERL_ATOM_EXT: - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.atom_name, s, len); - term->value.atom_name[len] = '\0'; - s += len; - break; + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + return ei_decode_atom(buf, index, term->value.atom_name); case ERL_REFERENCE_EXT: /* first the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.ref.node, s, len); - term->value.ref.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; /* now the numbers: num (4), creation (1) */ term->value.ref.n[0] = get32be(s); term->value.ref.len = 1; @@ -71,12 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) /* first the integer count */ term->value.ref.len = get16be(s); /* then the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.ref.node, s, len); - term->value.ref.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; /* creation */ term->value.ref.creation = get8(s) & 0x03; /* finally the id integers */ @@ -88,22 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) } break; case ERL_PORT_EXT: - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.port.node, s, len); - term->value.port.node[len] = '\0'; + if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1; term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */; term->value.port.creation = get8(s) & 0x03; break; case ERL_PID_EXT: - if (get8(s) != ERL_ATOM_EXT) return -1; - /* name first */ - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.pid.node, s, len); - term->value.pid.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1; /* now the numbers: num (4), serial (4), creation (1) */ term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */ term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */ diff --git a/lib/erl_interface/src/misc/ei_format.c b/lib/erl_interface/src/misc/ei_format.c index 281a192535..b5f11e618e 100644 --- a/lib/erl_interface/src/misc/ei_format.c +++ b/lib/erl_interface/src/misc/ei_format.c @@ -139,8 +139,8 @@ static int patom(const char** fmt, ei_x_buff* x) --(*fmt); len = *fmt - start; /* FIXME why truncate atom name and not fail?! */ - if (len > MAXATOMLEN) - len = MAXATOMLEN; + if (len >= MAXATOMLEN) + len = MAXATOMLEN-1; return ei_x_encode_atom_len(x, start, len); } diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c index 5fc6b3542c..f3003a6172 100644 --- a/lib/erl_interface/src/misc/ei_printterm.c +++ b/lib/erl_interface/src/misc/ei_printterm.c @@ -115,7 +115,7 @@ static int print_term(FILE* fp, ei_x_buff* x, const char* buf, int* index) { int i, doquote, n, m, ty, r; - char a[MAXATOMLEN+1], *p; + char a[MAXATOMLEN], *p; int ch_written = 0; /* counter of written chars */ erlang_pid pid; erlang_port port; @@ -132,7 +132,10 @@ static int print_term(FILE* fp, ei_x_buff* x, doquote = 0; ei_get_type_internal(buf, index, &ty, &n); switch (ty) { - case ERL_ATOM_EXT: + case ERL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: if (ei_decode_atom(buf, index, a) < 0) goto err; doquote = !islower((int)a[0]); diff --git a/lib/erl_interface/src/misc/ei_x_encode.c b/lib/erl_interface/src/misc/ei_x_encode.c index fa1e26ccbb..44dcff7664 100644 --- a/lib/erl_interface/src/misc/ei_x_encode.c +++ b/lib/erl_interface/src/misc/ei_x_encode.c @@ -197,18 +197,33 @@ int ei_x_encode_tuple_header(ei_x_buff* x, long n) int ei_x_encode_atom(ei_x_buff* x, const char* s) { - return ei_x_encode_atom_len(x, s, strlen(s)); + return ei_x_encode_atom_len_as(x, s, strlen(s), ERLANG_LATIN1, ERLANG_LATIN1); } int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len) { + return ei_x_encode_atom_len_as(x, s, len, ERLANG_LATIN1, ERLANG_LATIN1); +} + +int ei_x_encode_atom_as(ei_x_buff* x, const char* s, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ + return ei_x_encode_atom_len_as(x, s, strlen(s), from_enc, to_enc); +} + +int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ int i = x->index; - ei_encode_atom_len(NULL, &i, s, len); + ei_encode_atom_len_as(NULL, &i, s, len, from_enc, to_enc); if (!x_fix_buff(x, i)) return -1; - return ei_encode_atom_len(x->buff, &x->index, s, len); + return ei_encode_atom_len_as(x->buff, &x->index, s, len, from_enc, to_enc); } + int ei_x_encode_pid(ei_x_buff* x, const erlang_pid* pid) { int i = x->index; diff --git a/lib/erl_interface/src/misc/get_type.c b/lib/erl_interface/src/misc/get_type.c index 2a680d0f94..54465196b0 100644 --- a/lib/erl_interface/src/misc/get_type.c +++ b/lib/erl_interface/src/misc/get_type.c @@ -33,78 +33,6 @@ int ei_get_type(const char *buf, const int *index, int *type, int *len) return ei_get_type_internal(buf, index, type, len); } -#if 0 -int ei_get_type(const char *buf, const int *index, int *type, int *len) -{ - const char *s = buf + *index; - int itype = get8(s); /* Internal type */ - - *len = 0; - - switch (*type) { - - case ERL_SMALL_INTEGER_EXT: - case ERL_INTEGER_EXT: - case ERL_SMALL_BIG_EXT: - case ERL_LARGE_BIG_EXT: - *type = EI_TYPE_INTEGER; - break; - - case ERL_FLOAT_EXT: - *type = EI_TYPE_FLOAT; - break; - - case ERL_SMALL_TUPLE_EXT: - *len = get8(s); - break; - - case ERL_ATOM_EXT: - case ERL_STRING_EXT: - *len = get16be(s); - break; - - case ERL_LARGE_TUPLE_EXT: - case ERL_LIST_EXT: - case ERL_BINARY_EXT: - *len = get32be(s); - break; - - case ERL_SMALL_BIG_EXT: - *len = (get8(s)+1)/2; /* big arity */ - break; - - case ERL_LARGE_BIG_EXT: - *len = (get32be(s)+1)/2; /* big arity */ - break; - - case ERL_BINARY_EXT: - *type = EI_TYPE_BINARY; - break; - - case ERL_PID_EXT: - *type = EI_TYPE_PID; - break; - - case ERL_PORT_EXT: - *type = EI_TYPE_PORT; - break; - - case ERL_REFERENCE_EXT: - case ERL_NEW_REFERENCE_EXT: - *type = EI_TYPE_REF; - break; - - default: - break; - } - - /* leave index unchanged */ - return 0; -} -#endif - - -/* Old definition of function above */ int ei_get_type_internal(const char *buf, const int *index, int *type, int *len) @@ -114,10 +42,15 @@ int ei_get_type_internal(const char *buf, const int *index, *type = get8(s); switch (*type) { + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + *type = ERL_ATOM_EXT; case ERL_SMALL_TUPLE_EXT: *len = get8(s); break; - + + case ERL_ATOM_UTF8_EXT: + *type = ERL_ATOM_EXT; case ERL_ATOM_EXT: case ERL_STRING_EXT: *len = get16be(s); diff --git a/lib/erl_interface/src/misc/putget.h b/lib/erl_interface/src/misc/putget.h index 7a43de324b..77ae168f8c 100644 --- a/lib/erl_interface/src/misc/putget.h +++ b/lib/erl_interface/src/misc/putget.h @@ -105,6 +105,13 @@ ((EI_ULONGLONG)((unsigned char *)(s))[-2] << 8) | \ (EI_ULONGLONG)((unsigned char *)(s))[-1])) +int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp); +int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp); +int ei_internal_get_atom(const char** bufp, char* p, enum erlang_char_encoding*); +int ei_internal_put_atom(char** bufp, const char* p, int slen, enum erlang_char_encoding); +#define get_atom ei_internal_get_atom +#define put_atom ei_internal_put_atom + typedef union float_ext { double d; EI_ULONGLONG val; diff --git a/lib/erl_interface/src/misc/show_msg.c b/lib/erl_interface/src/misc/show_msg.c index 194296798b..33b09643ca 100644 --- a/lib/erl_interface/src/misc/show_msg.c +++ b/lib/erl_interface/src/misc/show_msg.c @@ -132,13 +132,13 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf) switch (msg.msgtype) { case ERL_SEND: - if (ei_decode_atom(header,&index,msg.cookie) + if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg.to)) return -1; mbuf = msgbuf; break; case ERL_SEND_TT: - if (ei_decode_atom(header,&index,msg.cookie) + if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg.to) || ei_decode_trace(header,&index,&msg.token)) return -1; mbuf = msgbuf; @@ -146,15 +146,15 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf) case ERL_REG_SEND: if (ei_decode_pid(header,&index,&msg.from) - || ei_decode_atom(header,&index,msg.cookie) - || ei_decode_atom(header,&index,msg.toname)) return -1; + || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)) return -1; mbuf = msgbuf; break; case ERL_REG_SEND_TT: if (ei_decode_pid(header,&index,&msg.from) - || ei_decode_atom(header,&index,msg.cookie) - || ei_decode_atom(header,&index,msg.toname) + || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL) || ei_decode_trace(header,&index,&msg.token)) return -1; mbuf = msgbuf; break; @@ -457,7 +457,7 @@ static void show_term(const char *termbuf, int *index, FILE *stream) break; case ERL_FUN_EXT: { - char atom[MAXATOMLEN+1]; + char atom[MAXATOMLEN]; long idx; long uniq; const char* s = termbuf + *index, * s0 = s; |