From 3aa60cc472bc330dbe9360eb27a1f340b7e23dc6 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Tue, 22 Jan 2013 18:35:35 +0100 Subject: Add UTF-8 node name support for epmd --- erts/epmd/src/epmd_cli.c | 13 ++- erts/epmd/src/epmd_int.h | 20 +++- erts/epmd/src/epmd_srv.c | 215 ++++++++++++++++++++++++++++++++++++------ erts/epmd/test/epmd_SUITE.erl | 47 ++++++++- 4 files changed, 254 insertions(+), 41 deletions(-) (limited to 'erts/epmd') diff --git a/erts/epmd/src/epmd_cli.c b/erts/epmd/src/epmd_cli.c index 74408e3ebe..1d4de64b63 100644 --- a/erts/epmd/src/epmd_cli.c +++ b/erts/epmd/src/epmd_cli.c @@ -22,6 +22,7 @@ #endif #include "epmd.h" /* Renamed from 'epmd_r4.h' */ #include "epmd_int.h" +#include "erl_printf.h" /* erts_snprintf */ /* forward declarations */ @@ -114,16 +115,18 @@ void epmd_call(EpmdVars *g,int what) epmd_cleanup_exit(g,1); } j = ntohl(i); - if (!g->silent) - printf("epmd: up and running on port %d with data:\n", j); + if (!g->silent) { + rval = erts_snprintf(buf, OUTBUF_SIZE, + "epmd: up and running on port %d with data:\n", j); + write(1, buf, rval); + } while(1) { - if ((rval = read(fd,buf,1)) <= 0) { + if ((rval = read(fd,buf,OUTBUF_SIZE)) <= 0) { close(fd); epmd_cleanup_exit(g,0); } - buf[rval] = '\0'; if (!g->silent) - printf("%s",buf); + write(1, buf, rval); /* Potentially UTF-8 encoded */ } } diff --git a/erts/epmd/src/epmd_int.h b/erts/epmd/src/epmd_int.h index 14d05c3f19..b25412c905 100644 --- a/erts/epmd/src/epmd_int.h +++ b/erts/epmd/src/epmd_int.h @@ -226,13 +226,25 @@ #define MAX_UNREG_COUNT 1000 #define DEBUG_MAX_UNREG_COUNT 5 -/* Maximum length of a node name == atom name */ -#define MAXSYMLEN 255 +/* + * Maximum length of a node name == atom name + * 255 characters; UTF-8 encoded -> max 255*4 + */ +#define MAXSYMLEN (255*4) #define MAX_LISTEN_SOCKETS 16 -#define INBUF_SIZE 1024 -#define OUTBUF_SIZE 1024 +/* + * Largest request: ALIVE2_REQ + * 2 + 13 + 2*MAXSYMLEN + * Largest response: PORT2_RESP + * 2 + 14 + 2*MAXSYMLEN + * + * That is, 3*MAXSYMLEN should be large enough + */ + +#define INBUF_SIZE (3*MAXSYMLEN) +#define OUTBUF_SIZE (3*MAXSYMLEN) #define get_int16(s) ((((unsigned char*) (s))[0] << 8) | \ (((unsigned char*) (s))[1])) diff --git a/erts/epmd/src/epmd_srv.c b/erts/epmd/src/epmd_srv.c index 36565b7438..2a74c4955e 100644 --- a/erts/epmd/src/epmd_srv.c +++ b/erts/epmd/src/epmd_srv.c @@ -73,7 +73,7 @@ static int conn_open(EpmdVars*,int); static int conn_close_fd(EpmdVars*,int); static void node_init(EpmdVars*); -static Node *node_reg2(EpmdVars*,char*, int, int, unsigned char, unsigned char, int, int, int, char*); +static Node *node_reg2(EpmdVars*, int, char*, int, int, unsigned char, unsigned char, int, int, int, char*); static int node_unreg(EpmdVars*,char*); static int node_unreg_sock(EpmdVars*,int); @@ -81,6 +81,113 @@ static int reply(EpmdVars*,int,char *,int); static void dbg_print_buf(EpmdVars*,char *,int); static void print_names(EpmdVars*); +static int is_same_str(char *x, char *y) +{ + int i = 0; + /* + * Using strcmp() == 0 is probably ok, but just to be sure, + * since we got UTF-8 strings, we do it ourselves. + * + * We assume null-terminated correctly encoded UTF-8. + */ + while (x[i] == y[i]) { + if (x[i] == '\0') + return 1; + i++; + } + return 0; +} + +static int copy_str(char *x, char *y) +{ + int i = 0; + /* + * Using strcpy() is probably ok, but just to be sure, + * since we got UTF-8 strings, we do it ourselves. + * + * We assume null-terminated correctly encoded UTF-8. + */ + while (1) { + x[i] = y[i]; + if (y[i] == '\0') + return i; + i++; + } +} + +static int length_str(char *x) +{ + int i = 0; + /* + * Using strlen is probably ok, but just to be sure, + * since we got UTF-8 strings, we do it ourselves. + * + * We assume null-terminated correctly encoded UTF-8. + */ + while (x[i]) + i++; + return i; +} + +static int verify_utf8(const char *src, int sz, int null_term) +{ + unsigned char *source = (unsigned char *) src; + int size = sz; + int num_chars = 0; + while (size) { + if (null_term && (*source) == 0) + return num_chars; + if (((*source) & ((unsigned char) 0x80)) == 0) { + source++; + --size; + } else if (((*source) & ((unsigned char) 0xE0)) == 0xC0) { + if (size < 2) + return -1; + if (((source[1] & ((unsigned char) 0xC0)) != 0x80) || + ((*source) < 0xC2) /* overlong */) { + return -1; + } + source += 2; + size -= 2; + } else if (((*source) & ((unsigned char) 0xF0)) == 0xE0) { + if (size < 3) + return -1; + if (((source[1] & ((unsigned char) 0xC0)) != 0x80) || + ((source[2] & ((unsigned char) 0xC0)) != 0x80) || + (((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) { + return -1; + } + if ((((*source) & ((unsigned char) 0xF)) == 0xD) && + ((source[1] & 0x20) != 0)) { + return -1; + } + source += 3; + size -= 3; + } else if (((*source) & ((unsigned char) 0xF8)) == 0xF0) { + if (size < 4) + return -1; + if (((source[1] & ((unsigned char) 0xC0)) != 0x80) || + ((source[2] & ((unsigned char) 0xC0)) != 0x80) || + ((source[3] & ((unsigned char) 0xC0)) != 0x80) || + (((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) { + return -1; + } + if ((((*source) & ((unsigned char)0x7)) > 0x4U) || + ((((*source) & ((unsigned char)0x7)) == 0x4U) && + ((source[1] & ((unsigned char)0x3F)) > 0xFU))) { + return -1; + } + source += 4; + size -= 4; + } else { + return -1; + } + ++num_chars; + } + return num_chars; +} + + static EPMD_INLINE void select_fd_set(EpmdVars* g, int fd) { FD_SET(fd, &g->orig_read_mask); @@ -525,10 +632,11 @@ static void do_request(g, fd, s, buf, bsize) } name = &buf[11]; name[namelen]='\000'; + extra = &buf[11+namelen+2]; extra[extralen]='\000'; wbuf[0] = EPMD_ALIVE2_RESP; - if ((node = node_reg2(g, name, fd, eport, nodetype, protocol, + if ((node = node_reg2(g, namelen, name, fd, eport, nodetype, protocol, highvsn, lowvsn, extralen, extra)) == NULL) { wbuf[1] = 1; /* error */ put_int16(99, wbuf+2); @@ -573,22 +681,28 @@ static void do_request(g, fd, s, buf, bsize) { char *name = &buf[1]; /* Points to node name */ + int nsz; Node *node; - + + nsz = verify_utf8(name, bsize, 0); + if (nsz < 1 || 255 < nsz) { + dbg_printf(g,0,"invalid node name in PORT2_REQ"); + return; + } + wbuf[0] = EPMD_PORT2_RESP; for (node = g->nodes.reg; node; node = node->next) { int offset; - if (strcmp(node->symname, name) == 0) { + if (is_same_str(node->symname, name)) { wbuf[1] = 0; /* ok */ put_int16(node->port,wbuf+2); wbuf[4] = node->nodetype; wbuf[5] = node->protocol; put_int16(node->highvsn,wbuf+6); put_int16(node->lowvsn,wbuf+8); - put_int16(strlen(node->symname),wbuf+10); + put_int16(length_str(node->symname),wbuf+10); offset = 12; - strcpy(wbuf + offset,node->symname); - offset += strlen(node->symname); + offset += copy_str(wbuf + offset,node->symname); put_int16(node->extralen,wbuf + offset); offset += 2; memcpy(wbuf + offset,node->extra,node->extralen); @@ -629,15 +743,22 @@ static void do_request(g, fd, s, buf, bsize) for (node = g->nodes.reg; node; node = node->next) { - int len; + int len = 0; + int r; /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight change in syntax will break < OTP R3A */ - erts_snprintf(wbuf, sizeof(wbuf), "name %s at port %d\n",node->symname, node->port); - len = strlen(wbuf); + len += copy_str(&wbuf[len], "name "); + len += copy_str(&wbuf[len], node->symname); + r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len, + " at port %d\n", node->port); + if (r < 0) + goto failed_names_resp; + len += r; if (reply(g, fd, wbuf, len) != len) { + failed_names_resp: dbg_tty_printf(g,1,"failed to send NAMES_RESP"); return; } @@ -665,16 +786,22 @@ static void do_request(g, fd, s, buf, bsize) for (node = g->nodes.reg; node; node = node->next) { - int len; + int len = 0, r; /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight change in syntax will break < OTP R3A */ - erts_snprintf(wbuf, sizeof(wbuf), "active name <%s> at port %d, fd = %d\n", - node->symname, node->port, node->fd); - len = strlen(wbuf) + 1; - if (reply(g, fd,wbuf,len) != len) + len += copy_str(&wbuf[len], "active name <"); + len += copy_str(&wbuf[len], node->symname); + r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len, + "> at port %d, fd = %d\n", + node->port, node->fd); + if (r < 0) + goto failed_dump_resp; + len += r + 1; + if (reply(g, fd,wbuf,len) != len) { + failed_dump_resp: dbg_tty_printf(g,1,"failed to send DUMP_RESP"); return; } @@ -682,16 +809,22 @@ static void do_request(g, fd, s, buf, bsize) for (node = g->nodes.unreg; node; node = node->next) { - int len; + int len = 0, r; /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight change in syntax will break < OTP R3A */ - erts_snprintf(wbuf, sizeof(wbuf), "old/unused name <%s>, port = %d, fd = %d \n", - node->symname,node->port, node->fd); - len = strlen(wbuf) + 1; - if (reply(g, fd,wbuf,len) != len) + len += copy_str(&wbuf[len], "old/unused name <"); + len += copy_str(&wbuf[len], node->symname); + r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len, + ">, port = %d, fd = %d \n", + node->port, node->fd); + if (r < 0) + goto failed_dump_resp2; + len += r + 1; + if (reply(g, fd,wbuf,len) != len) { + failed_dump_resp2: dbg_tty_printf(g,1,"failed to send DUMP_RESP"); return; } @@ -933,7 +1066,7 @@ static int node_unreg(EpmdVars *g,char *name) Node *node = g->nodes.reg; /* Point to first node */ for (; node; prev = &node->next, node = node->next) - if (strcmp(node->symname, name) == 0) + if (is_same_str(node->symname, name)) { dbg_tty_printf(g,1,"unregistering '%s:%d', port %d", node->symname, node->creation, node->port); @@ -1013,6 +1146,7 @@ static int node_unreg_sock(EpmdVars *g,int fd) */ static Node *node_reg2(EpmdVars *g, + int namelen, char* name, int fd, int port, @@ -1025,6 +1159,7 @@ static Node *node_reg2(EpmdVars *g, { Node *prev; /* Point to previous node or NULL */ Node *node; /* Point to first node */ + int sz; /* Can be NULL; means old style */ if (extra == NULL) @@ -1032,21 +1167,47 @@ static Node *node_reg2(EpmdVars *g, /* Fail if node name is too long */ - if (strlen(name) > MAXSYMLEN) + + if (namelen > MAXSYMLEN) { - dbg_printf(g,0,"node name is too long (%d) %s", strlen(name), name); + too_long_name: + dbg_printf(g,0,"node name is too long (%d) %s", namelen, name); return NULL; } + + sz = verify_utf8(name, namelen, 0); + if (sz > 255) + goto too_long_name; + + if (sz < 0) { + dbg_printf(g,0,"invalid node name encoding"); + return NULL; + } + if (extralen > MAXSYMLEN) { - dbg_printf(g,0,"extra data is too long (%d) %s", strlen(name), name); +#if 0 + too_long_extra: +#endif + dbg_printf(g,0,"extra data is too long (%d) %s", extralen, extra); return NULL; } +#if 0 /* Should we require valid utf8 here? */ + sz = verify_utf8(extra, extralen, 0); + if (sz > 255) + goto too_long_extra; + + if (sz < 0) { + dbg_printf(g,0,"invalid extra data encoding"); + return NULL; + } +#endif + /* Fail if it is already registered */ for (node = g->nodes.reg; node; node = node->next) - if (strcmp(node->symname, name) == 0) + if (is_same_str(node->symname, name)) { dbg_printf(g,0,"node name already occupied %s", name); return NULL; @@ -1058,7 +1219,7 @@ static Node *node_reg2(EpmdVars *g, prev = NULL; for (node = g->nodes.unreg; node; prev = node, node = node->next) - if (strcmp(node->symname, name) == 0) + if (is_same_str(node->symname, name)) { dbg_tty_printf(g,1,"reusing slot with same name '%s'", node->symname); @@ -1126,7 +1287,7 @@ static Node *node_reg2(EpmdVars *g, node->lowvsn = lowvsn; node->extralen = extralen; memcpy(node->extra,extra,extralen); - strcpy(node->symname,name); + copy_str(node->symname,name); select_fd_set(g, fd); if (highvsn == 0) { diff --git a/erts/epmd/test/epmd_SUITE.erl b/erts/epmd/test/epmd_SUITE.erl index fd9969ae2b..fc0abef400 100644 --- a/erts/epmd/test/epmd_SUITE.erl +++ b/erts/epmd/test/epmd_SUITE.erl @@ -45,6 +45,8 @@ register_names_1/1, register_names_2/1, register_duplicate_name/1, + unicode_name/1, + long_unicode_name/1, get_port_nr/1, slow_get_port_nr/1, unregister_others_name_1/1, @@ -107,7 +109,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [register_name, register_names_1, register_names_2, - register_duplicate_name, get_port_nr, slow_get_port_nr, + register_duplicate_name, unicode_name, long_unicode_name, + get_port_nr, slow_get_port_nr, unregister_others_name_1, unregister_others_name_2, register_overflow, name_with_null_inside, name_null_terminated, stupid_names_req, no_data, @@ -197,6 +200,37 @@ register_duplicate_name(Config) when is_list(Config) -> ?line ok = close(Sock), % Unregister ok. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unicode_name(doc) -> + ["Check that we can register and lookup a unicode name"]; +unicode_name(suite) -> + []; +unicode_name(Config) when is_list(Config) -> + ok = epmdrun(), + NodeName = [16#1f608], + {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []), + {ok,NodeInfo} = port_please_v2(NodeName), + NodeName = NodeInfo#node_info.node_name, + ok = close(Sock), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +long_unicode_name(doc) -> + ["Check that we can register and lookup a long unicode name"]; +long_unicode_name(suite) -> + []; +long_unicode_name(Config) when is_list(Config) -> + ok = epmdrun(), + BaseChar = 16#1f600, + NodeName = lists:seq(BaseChar, BaseChar+200), % will be 800 bytes long + {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []), + {ok,NodeInfo} = port_please_v2(NodeName), + NodeName = NodeInfo#node_info.node_name, + ok = close(Sock), + ok. + % Internal function to register a node name, no close, i.e. unregister register_node(Name) -> @@ -205,9 +239,10 @@ register_node(Name,Port) -> register_node_v2(Port,$M,0,5,5,Name,""). register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) -> + Utf8Name = unicode:characters_to_binary(Name), Req = [?EPMD_ALIVE2_REQ, put16(Port), NodeType, Prot, put16(HVsn), put16(LVsn), - size16(Name), Name, + put16(size(Utf8Name)), binary_to_list(Utf8Name), size16(Extra), Extra], case send_req(Req) of {ok,Sock} -> @@ -226,7 +261,8 @@ register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) -> % Internal function to fetch information about a node port_please_v2(Name) -> - case send_req([?EPMD_PORT_PLEASE2_REQ, Name]) of + case send_req([?EPMD_PORT_PLEASE2_REQ, + binary_to_list(unicode:characters_to_binary(Name))]) of {ok,Sock} -> case recv_until_sock_closes(Sock) of {ok, Resp} -> @@ -247,7 +283,7 @@ parse_port2_resp(Resp) -> ELen:16,Extra:ELen/binary>> when Res =:= 0 -> {ok, #node_info{port=Port,node_type=NodeType,prot=Prot, hvsn=HVsn,lvsn=LVsn, - node_name=binary_to_list(NodeName), + node_name=unicode:characters_to_list(NodeName), extra=binary_to_list(Extra)}}; _Other -> test_server:format("invalid port2 resp: ~p~n", @@ -737,7 +773,7 @@ buffer_overrun_2(doc) -> ["Test security vulnerability in fake extra lengths in alive2_req"]; buffer_overrun_2(Config) when is_list(Config) -> ?line ok = epmdrun(), - ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255,10000)], + ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255*4,10000)], ?line true = alltrue(Rest), ok. hostile(N) -> @@ -880,6 +916,7 @@ no_live_killing(Config) when is_list(Config) -> ?line close(Sock3), ok. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Terminate all tests with killing epmd. -- cgit v1.2.3