From 9a35c01873fb56316136e1314ad6adffe97b9fa5 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 18 Feb 2013 15:09:00 +0100 Subject: erl_interface,ic: Remove node_org_enc from erlang_{pid,port,ref} in order to be backward compatible with user code that accesses the members of erlang_pid and friend. The documentation does not mention the content of these structs, but we have example code that does. So the safe way it the revert the node_org_enc field (added in R16A) and instead determine in runtime which atom encoding to use depending on if the node atom contains unicode (>255) characters or not. --- lib/erl_interface/doc/src/ei.xml | 4 +++- lib/erl_interface/include/ei.h | 3 --- lib/erl_interface/src/connect/ei_connect.c | 1 - lib/erl_interface/src/decode/decode_pid.c | 2 +- lib/erl_interface/src/decode/decode_port.c | 2 +- lib/erl_interface/src/decode/decode_ref.c | 4 ++-- lib/erl_interface/src/encode/encode_atom.c | 24 +++++++++++++++++++++--- lib/erl_interface/src/encode/encode_pid.c | 3 ++- lib/erl_interface/src/encode/encode_port.c | 2 +- lib/erl_interface/src/encode/encode_ref.c | 2 +- lib/erl_interface/src/legacy/erl_connect.c | 2 -- lib/erl_interface/src/misc/ei_decode_term.c | 8 ++++---- 12 files changed, 36 insertions(+), 21 deletions(-) (limited to 'lib/erl_interface') diff --git a/lib/erl_interface/doc/src/ei.xml b/lib/erl_interface/doc/src/ei.xml index dfe181bd1d..6c340378d4 100644 --- a/lib/erl_interface/doc/src/ei.xml +++ b/lib/erl_interface/doc/src/ei.xml @@ -264,7 +264,9 @@ typedef enum { The p parameter is the name of the atom with character encoding from_enc (ascii, latin1 or utf8). The name must either be zero-terminated or a function variant with a len - parameter must be used.

+ parameter must be used. If to_enc is set to the bitwise-or'd combination + (ERLANG_LATIN1|ERLANG_UTF8), utf8 encoding is only used if the atom string + can not be represented in latin1 encoding.

The encoding will fail if p is not a valid string in encoding from_enc, if the string is too long or if it can not be represented with character encoding to_enc.

These functions were introduced in R16 release of Erlang/OTP as part of a first step diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h index 66dc64a69d..f51f377b9c 100644 --- a/lib/erl_interface/include/ei.h +++ b/lib/erl_interface/include/ei.h @@ -199,7 +199,6 @@ typedef enum { /* a pid */ typedef struct { char node[MAXATOMLEN_UTF8]; - erlang_char_encoding node_org_enc; unsigned int num; unsigned int serial; unsigned int creation; @@ -208,7 +207,6 @@ typedef struct { /* a port */ typedef struct { char node[MAXATOMLEN_UTF8]; - erlang_char_encoding node_org_enc; unsigned int id; unsigned int creation; } erlang_port; @@ -216,7 +214,6 @@ typedef struct { /* a ref */ typedef struct { char node[MAXATOMLEN_UTF8]; - erlang_char_encoding node_org_enc; int len; unsigned int n[3]; unsigned int creation; diff --git a/lib/erl_interface/src/connect/ei_connect.c b/lib/erl_interface/src/connect/ei_connect.c index c1361e169e..3ab86bb340 100644 --- a/lib/erl_interface/src/connect/ei_connect.c +++ b/lib/erl_interface/src/connect/ei_connect.c @@ -459,7 +459,6 @@ int ei_connect_xinit(ei_cnode* ec, const char *thishostname, /* memmove(&ec->this_ipaddr, thisipaddr, sizeof(ec->this_ipaddr)); */ strcpy(ec->self.node,thisnodename); - ec->self.node_org_enc = ERLANG_LATIN1; ec->self.num = 0; ec->self.serial = 0; ec->self.creation = creation; diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c index d429fb2fd8..cd5ae2ab20 100644 --- a/lib/erl_interface/src/decode/decode_pid.c +++ b/lib/erl_interface/src/decode/decode_pid.c @@ -30,7 +30,7 @@ int ei_decode_pid(const char *buf, int *index, erlang_pid *p) if (get8(s) != ERL_PID_EXT) return -1; if (p) { - if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; + if (get_atom(&s, p->node, NULL) < 0) return -1; p->num = get32be(s) & 0x7fff; /* 15 bits */ p->serial = get32be(s) & 0x1fff; /* 13 bits */ p->creation = get8(s) & 0x03; /* 2 bits */ diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c index 7a691f0be6..8fbdc5f3d3 100644 --- a/lib/erl_interface/src/decode/decode_port.c +++ b/lib/erl_interface/src/decode/decode_port.c @@ -29,7 +29,7 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p) if (get8(s) != ERL_PORT_EXT) return -1; if (p) { - if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; + if (get_atom(&s, p->node, NULL) < 0) return -1; p->id = get32be(s) & 0x0fffffff /* 28 bits */; p->creation = get8(s) & 0x03; } diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c index 01e3061cb4..78db118172 100644 --- a/lib/erl_interface/src/decode/decode_ref.c +++ b/lib/erl_interface/src/decode/decode_ref.c @@ -31,7 +31,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p) switch (get8(s)) { case ERL_REFERENCE_EXT: if (p) { - if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; + if (get_atom(&s, p->node, NULL) < 0) return -1; p->n[0] = get32be(s); p->len = 1; p->creation = get8(s) & 0x03; @@ -52,7 +52,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p) if (p) { p->len = count; - if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; + if (get_atom(&s, p->node, NULL) < 0) return -1; p->creation = get8(s) & 0x03; } else { diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c index df4b0af5db..46d34c3bf0 100644 --- a/lib/erl_interface/src/encode/encode_atom.c +++ b/lib/erl_interface/src/encode/encode_atom.c @@ -25,7 +25,7 @@ static int verify_ascii_atom(const char* src, int slen); static int verify_utf8_atom(const char* src, int slen); - +static int is_latin1_as_utf8(const char *p, int len); int ei_encode_atom(char *buf, int *index, const char *p) { @@ -63,6 +63,14 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, return -1; } + if (to_enc == (ERLANG_LATIN1 | ERLANG_UTF8)) { + if (from_enc == ERLANG_UTF8) { + to_enc = is_latin1_as_utf8(p, len) ? ERLANG_LATIN1 : ERLANG_UTF8; + } + else { + to_enc = from_enc; + } + } switch(to_enc) { case ERLANG_LATIN1: if (buf) { @@ -148,7 +156,7 @@ ei_internal_put_atom(char** bufp, const char* p, int slen, } -int verify_ascii_atom(const char* src, int slen) +static int verify_ascii_atom(const char* src, int slen) { while (slen > 0) { if ((src[0] & 0x80) != 0) return -1; @@ -158,7 +166,7 @@ int verify_ascii_atom(const char* src, int slen) return 0; } -int verify_utf8_atom(const char* src, int slen) +static int verify_utf8_atom(const char* src, int slen) { int num_chars = 0; @@ -188,3 +196,13 @@ int verify_utf8_atom(const char* src, int slen) return 0; } +/* Only latin1 code points in utf8 string? + */ +static int is_latin1_as_utf8(const char *p, int len) +{ + int i; + for (i=0; i 0xC3) return 0; + } + return 1; +} diff --git a/lib/erl_interface/src/encode/encode_pid.c b/lib/erl_interface/src/encode/encode_pid.c index 903c9cce00..86d0f393e5 100644 --- a/lib/erl_interface/src/encode/encode_pid.c +++ b/lib/erl_interface/src/encode/encode_pid.c @@ -26,7 +26,8 @@ int ei_encode_pid(char *buf, int *index, const erlang_pid *p) char *s = buf + *index; ++(*index); /* skip ERL_PID_EXT */ - if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, p->node_org_enc) < 0) + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), + ERLANG_UTF8, ERLANG_LATIN1|ERLANG_UTF8) < 0) return -1; if (buf) { diff --git a/lib/erl_interface/src/encode/encode_port.c b/lib/erl_interface/src/encode/encode_port.c index c729aeb4eb..a206de56c7 100644 --- a/lib/erl_interface/src/encode/encode_port.c +++ b/lib/erl_interface/src/encode/encode_port.c @@ -27,7 +27,7 @@ int ei_encode_port(char *buf, int *index, const erlang_port *p) ++(*index); /* skip ERL_PORT_EXT */ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, - p->node_org_enc) < 0) { + ERLANG_LATIN1|ERLANG_UTF8) < 0) { return -1; } if (buf) { diff --git a/lib/erl_interface/src/encode/encode_ref.c b/lib/erl_interface/src/encode/encode_ref.c index 3511366bef..9855231848 100644 --- a/lib/erl_interface/src/encode/encode_ref.c +++ b/lib/erl_interface/src/encode/encode_ref.c @@ -28,7 +28,7 @@ int ei_encode_ref(char *buf, int *index, const erlang_ref *p) (*index) += 1 + 2; /* skip to node atom */ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, - p->node_org_enc) < 0) { + ERLANG_LATIN1|ERLANG_UTF8) < 0) { return -1; } diff --git a/lib/erl_interface/src/legacy/erl_connect.c b/lib/erl_interface/src/legacy/erl_connect.c index eca16497dc..ae0265a388 100644 --- a/lib/erl_interface/src/legacy/erl_connect.c +++ b/lib/erl_interface/src/legacy/erl_connect.c @@ -250,11 +250,9 @@ int erl_send(int fd, ETERM *to ,ETERM *msg) if (to->uval.pidval.node.latin1) { strcpy(topid.node, to->uval.pidval.node.latin1); - topid.node_org_enc = ERLANG_LATIN1; } else { strcpy(topid.node, to->uval.pidval.node.utf8); - topid.node_org_enc = ERLANG_UTF8; } topid.num = ERL_PID_NUMBER(to); topid.serial = ERL_PID_SERIAL(to); diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c index 1423ec7ed7..ce5ae5b19d 100644 --- a/lib/erl_interface/src/misc/ei_decode_term.c +++ b/lib/erl_interface/src/misc/ei_decode_term.c @@ -54,7 +54,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) return ei_decode_atom(buf, index, term->value.atom_name); case ERL_REFERENCE_EXT: /* first the nodename */ - if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; + if (get_atom(&s, term->value.ref.node, NULL) < 0) return -1; /* now the numbers: num (4), creation (1) */ term->value.ref.n[0] = get32be(s); term->value.ref.len = 1; @@ -64,7 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) /* first the integer count */ term->value.ref.len = get16be(s); /* then the nodename */ - if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; + if (get_atom(&s, term->value.ref.node, NULL) < 0) return -1; /* creation */ term->value.ref.creation = get8(s) & 0x03; /* finally the id integers */ @@ -76,12 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) } break; case ERL_PORT_EXT: - if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1; + if (get_atom(&s, term->value.port.node, NULL) < 0) return -1; term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */; term->value.port.creation = get8(s) & 0x03; break; case ERL_PID_EXT: - if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1; + if (get_atom(&s, term->value.pid.node, NULL) < 0) return -1; /* now the numbers: num (4), serial (4), creation (1) */ term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */ term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */ -- cgit v1.2.3 From 969224f54bec6d932aa61b9acc8e76b49604ebcc Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 20 Feb 2013 19:14:21 +0100 Subject: erl_interface: Fix some warnings in test code by adding missing #include's --- lib/erl_interface/test/all_SUITE_data/runner.c | 1 + lib/erl_interface/test/ei_format_SUITE_data/ei_format_test.c | 1 + lib/erl_interface/test/ei_print_SUITE_data/ei_print_test.c | 3 +++ lib/erl_interface/test/erl_eterm_SUITE_data/print_term.c | 1 + 4 files changed, 6 insertions(+) (limited to 'lib/erl_interface') diff --git a/lib/erl_interface/test/all_SUITE_data/runner.c b/lib/erl_interface/test/all_SUITE_data/runner.c index 24df0f5f40..d4ef362043 100644 --- a/lib/erl_interface/test/all_SUITE_data/runner.c +++ b/lib/erl_interface/test/all_SUITE_data/runner.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include diff --git a/lib/erl_interface/test/ei_format_SUITE_data/ei_format_test.c b/lib/erl_interface/test/ei_format_SUITE_data/ei_format_test.c index 4f6c15ba9c..6a68e3ba8f 100644 --- a/lib/erl_interface/test/ei_format_SUITE_data/ei_format_test.c +++ b/lib/erl_interface/test/ei_format_SUITE_data/ei_format_test.c @@ -22,6 +22,7 @@ #endif #include "ei_runner.h" +#include /* * Purpose: Tests the ei_format() function. diff --git a/lib/erl_interface/test/ei_print_SUITE_data/ei_print_test.c b/lib/erl_interface/test/ei_print_SUITE_data/ei_print_test.c index cc9b8048ca..0475edb227 100644 --- a/lib/erl_interface/test/ei_print_SUITE_data/ei_print_test.c +++ b/lib/erl_interface/test/ei_print_SUITE_data/ei_print_test.c @@ -19,6 +19,9 @@ #include "ei_runner.h" +#include +#include + /* * Purpose: Tests the ei_print() function. * Author: Jakob diff --git a/lib/erl_interface/test/erl_eterm_SUITE_data/print_term.c b/lib/erl_interface/test/erl_eterm_SUITE_data/print_term.c index 56e2d43d2f..1d8068c537 100644 --- a/lib/erl_interface/test/erl_eterm_SUITE_data/print_term.c +++ b/lib/erl_interface/test/erl_eterm_SUITE_data/print_term.c @@ -23,6 +23,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3