From 3aa60cc472bc330dbe9360eb27a1f340b7e23dc6 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Tue, 22 Jan 2013 18:35:35 +0100
Subject: Add UTF-8 node name support for epmd

---
 erts/epmd/src/epmd_cli.c      |  13 ++-
 erts/epmd/src/epmd_int.h      |  20 +++-
 erts/epmd/src/epmd_srv.c      | 215 ++++++++++++++++++++++++++++++++++++------
 erts/epmd/test/epmd_SUITE.erl |  47 ++++++++-
 4 files changed, 254 insertions(+), 41 deletions(-)

diff --git a/erts/epmd/src/epmd_cli.c b/erts/epmd/src/epmd_cli.c
index 74408e3ebe..1d4de64b63 100644
--- a/erts/epmd/src/epmd_cli.c
+++ b/erts/epmd/src/epmd_cli.c
@@ -22,6 +22,7 @@
 #endif
 #include "epmd.h"     /* Renamed from 'epmd_r4.h' */
 #include "epmd_int.h"
+#include "erl_printf.h" /* erts_snprintf */
 
 /* forward declarations */
 
@@ -114,16 +115,18 @@ void epmd_call(EpmdVars *g,int what)
 	epmd_cleanup_exit(g,1);
     }
     j = ntohl(i);
-    if (!g->silent)
-	printf("epmd: up and running on port %d with data:\n", j);
+    if (!g->silent) {
+	rval = erts_snprintf(buf, OUTBUF_SIZE,
+			     "epmd: up and running on port %d with data:\n", j);
+	write(1, buf, rval);
+    }
     while(1) {
-	if ((rval = read(fd,buf,1)) <= 0)  {
+	if ((rval = read(fd,buf,OUTBUF_SIZE)) <= 0)  {
 	    close(fd);
 	    epmd_cleanup_exit(g,0);
 	}
-	buf[rval] = '\0';
 	if (!g->silent)
-	    printf("%s",buf);
+	    write(1, buf, rval); /* Potentially UTF-8 encoded */
     }
 }
 
diff --git a/erts/epmd/src/epmd_int.h b/erts/epmd/src/epmd_int.h
index 14d05c3f19..b25412c905 100644
--- a/erts/epmd/src/epmd_int.h
+++ b/erts/epmd/src/epmd_int.h
@@ -226,13 +226,25 @@
 #define MAX_UNREG_COUNT 1000
 #define DEBUG_MAX_UNREG_COUNT 5
 
-/* Maximum length of a node name == atom name */
-#define MAXSYMLEN 255
+/*
+ * Maximum length of a node name == atom name
+ *   255 characters; UTF-8 encoded -> max 255*4
+ */
+#define MAXSYMLEN (255*4)
 
 #define MAX_LISTEN_SOCKETS 16
 
-#define INBUF_SIZE 1024
-#define OUTBUF_SIZE 1024
+/*
+ * Largest request: ALIVE2_REQ
+ *  2 + 13 + 2*MAXSYMLEN
+ * Largest response: PORT2_RESP
+ *  2 + 14 + 2*MAXSYMLEN
+ *
+ * That is, 3*MAXSYMLEN should be large enough
+ */
+
+#define INBUF_SIZE (3*MAXSYMLEN)
+#define OUTBUF_SIZE (3*MAXSYMLEN)
 
 #define get_int16(s) ((((unsigned char*)  (s))[0] << 8) | \
                       (((unsigned char*)  (s))[1]))
diff --git a/erts/epmd/src/epmd_srv.c b/erts/epmd/src/epmd_srv.c
index 36565b7438..2a74c4955e 100644
--- a/erts/epmd/src/epmd_srv.c
+++ b/erts/epmd/src/epmd_srv.c
@@ -73,7 +73,7 @@ static int conn_open(EpmdVars*,int);
 static int conn_close_fd(EpmdVars*,int);
 
 static void node_init(EpmdVars*);
-static Node *node_reg2(EpmdVars*,char*, int, int, unsigned char, unsigned char, int, int, int, char*);
+static Node *node_reg2(EpmdVars*, int, char*, int, int, unsigned char, unsigned char, int, int, int, char*);
 static int node_unreg(EpmdVars*,char*);
 static int node_unreg_sock(EpmdVars*,int);
 
@@ -81,6 +81,113 @@ static int reply(EpmdVars*,int,char *,int);
 static void dbg_print_buf(EpmdVars*,char *,int);
 static void print_names(EpmdVars*);
 
+static int is_same_str(char *x, char *y)
+{
+    int i = 0;
+    /*
+     * Using strcmp() == 0 is probably ok, but just to be sure,
+     * since we got UTF-8 strings, we do it ourselves.
+     *
+     * We assume null-terminated correctly encoded UTF-8.
+     */
+    while (x[i] == y[i]) {
+	if (x[i] == '\0')
+	    return 1;
+	i++;
+    }
+    return 0;
+}
+
+static int copy_str(char *x, char *y)
+{
+    int i = 0;
+    /*
+     * Using strcpy() is probably ok, but just to be sure,
+     * since we got UTF-8 strings, we do it ourselves.
+     *
+     * We assume null-terminated correctly encoded UTF-8.
+     */
+    while (1) {
+	x[i] = y[i];
+	if (y[i] == '\0')
+	    return i;
+	i++;
+    }
+}
+
+static int length_str(char *x)
+{
+    int i = 0;
+    /*
+     * Using strlen is probably ok, but just to be sure,
+     * since we got UTF-8 strings, we do it ourselves.
+     *
+     * We assume null-terminated correctly encoded UTF-8.
+     */
+    while (x[i])
+	i++;
+    return i;
+}
+
+static int verify_utf8(const char *src, int sz, int null_term)
+{
+    unsigned char *source = (unsigned char *) src;
+    int size = sz;
+    int num_chars = 0;
+    while (size) {
+	if (null_term && (*source) == 0)
+	    return num_chars;
+	if (((*source) & ((unsigned char) 0x80)) == 0) {
+	    source++;
+	    --size;
+	} else if (((*source) & ((unsigned char) 0xE0)) == 0xC0) {
+	    if (size < 2)
+		return -1;
+	    if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+		((*source) < 0xC2) /* overlong */) {
+		return -1;
+	    }
+	    source += 2;
+	    size -= 2;
+	} else if (((*source) & ((unsigned char) 0xF0)) == 0xE0) {
+	    if (size < 3)
+		return -1;
+	    if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+		((source[2] & ((unsigned char) 0xC0)) != 0x80) ||
+		(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
+		return -1;
+	    }
+	    if ((((*source) & ((unsigned char) 0xF)) == 0xD) && 
+		((source[1] & 0x20) != 0)) {
+		return -1;
+	    }
+	    source += 3;
+	    size -= 3;
+	} else if (((*source) & ((unsigned char) 0xF8)) == 0xF0) {
+	    if (size < 4)
+		return -1;
+	    if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+		((source[2] & ((unsigned char) 0xC0)) != 0x80) ||
+		((source[3] & ((unsigned char) 0xC0)) != 0x80) ||
+		(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
+		return -1;
+	    }
+	    if ((((*source) & ((unsigned char)0x7)) > 0x4U) ||
+		((((*source) & ((unsigned char)0x7)) == 0x4U) && 
+		 ((source[1] & ((unsigned char)0x3F)) > 0xFU))) {
+		return -1;
+	    }
+	    source += 4;
+	    size -= 4; 
+	} else {
+	    return -1;
+	}
+	++num_chars;
+    }
+    return num_chars;
+}
+
+
 static EPMD_INLINE void select_fd_set(EpmdVars* g, int fd)
 {
     FD_SET(fd, &g->orig_read_mask);
@@ -525,10 +632,11 @@ static void do_request(g, fd, s, buf, bsize)
 	    }
 	name = &buf[11];
 	name[namelen]='\000';
+
 	extra = &buf[11+namelen+2];
 	extra[extralen]='\000';
 	wbuf[0] = EPMD_ALIVE2_RESP;
-	if ((node = node_reg2(g, name, fd, eport, nodetype, protocol,
+	if ((node = node_reg2(g, namelen, name, fd, eport, nodetype, protocol,
 			      highvsn, lowvsn, extralen, extra)) == NULL) {
 	    wbuf[1] = 1; /* error */
 	    put_int16(99, wbuf+2);
@@ -573,22 +681,28 @@ static void do_request(g, fd, s, buf, bsize)
 
       {
 	char *name = &buf[1]; /* Points to node name */
+	int nsz;
 	Node *node;
-	
+
+	nsz = verify_utf8(name, bsize, 0);
+	if (nsz < 1 || 255 < nsz) {
+	    dbg_printf(g,0,"invalid node name in PORT2_REQ");
+	    return;
+	}
+
 	wbuf[0] = EPMD_PORT2_RESP;
 	for (node = g->nodes.reg; node; node = node->next) {
 	    int offset;
-	    if (strcmp(node->symname, name) == 0) {
+	    if (is_same_str(node->symname, name)) {
 		wbuf[1] = 0; /* ok */
 		put_int16(node->port,wbuf+2);
 		wbuf[4] = node->nodetype;
 		wbuf[5] = node->protocol;
 		put_int16(node->highvsn,wbuf+6);
 		put_int16(node->lowvsn,wbuf+8);
-		put_int16(strlen(node->symname),wbuf+10);
+		put_int16(length_str(node->symname),wbuf+10);
 		offset = 12;
-		strcpy(wbuf + offset,node->symname);
-		offset += strlen(node->symname);
+		offset += copy_str(wbuf + offset,node->symname);
 		put_int16(node->extralen,wbuf + offset);
 		offset += 2;
 		memcpy(wbuf + offset,node->extra,node->extralen);
@@ -629,15 +743,22 @@ static void do_request(g, fd, s, buf, bsize)
 
 	for (node = g->nodes.reg; node; node = node->next)
 	  {
-	    int len;
+	    int len = 0;
+	    int r;
 
 	    /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
 	       change in syntax will break < OTP R3A */
 
-	    erts_snprintf(wbuf, sizeof(wbuf), "name %s at port %d\n",node->symname, node->port);
-	    len = strlen(wbuf);
+	    len += copy_str(&wbuf[len], "name ");
+	    len += copy_str(&wbuf[len], node->symname);
+	    r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+			      " at port %d\n", node->port);
+	    if (r < 0)
+		goto failed_names_resp;
+	    len += r;
 	    if (reply(g, fd, wbuf, len) != len)
 	      {
+	      failed_names_resp:
 		dbg_tty_printf(g,1,"failed to send NAMES_RESP");
 		return;
 	      }
@@ -665,16 +786,22 @@ static void do_request(g, fd, s, buf, bsize)
 
 	for (node = g->nodes.reg; node; node = node->next)
 	  {
-	    int len;
+	      int len = 0, r;
 
 	    /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
 	       change in syntax will break < OTP R3A */
 
-	    erts_snprintf(wbuf, sizeof(wbuf), "active name     <%s> at port %d, fd = %d\n",
-		    node->symname, node->port, node->fd);
-	    len = strlen(wbuf) + 1;
-	    if (reply(g, fd,wbuf,len) != len)
+	      len += copy_str(&wbuf[len], "active name     <");
+	      len += copy_str(&wbuf[len], node->symname);
+	      r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+				"> at port %d, fd = %d\n",
+				node->port, node->fd);
+	      if (r < 0)
+		  goto failed_dump_resp;
+	      len += r + 1;
+	      if (reply(g, fd,wbuf,len) != len)
 	      {
+	      failed_dump_resp:
 		dbg_tty_printf(g,1,"failed to send DUMP_RESP");
 		return;
 	      }
@@ -682,16 +809,22 @@ static void do_request(g, fd, s, buf, bsize)
 
 	for (node = g->nodes.unreg; node; node = node->next)
 	  {
-	    int len;
+	      int len = 0, r;
 
 	    /* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
 	       change in syntax will break < OTP R3A */
 
-	    erts_snprintf(wbuf, sizeof(wbuf), "old/unused name <%s>, port = %d, fd = %d \n",
-		    node->symname,node->port, node->fd);
-	    len = strlen(wbuf) + 1;
-	    if (reply(g, fd,wbuf,len) != len)
+	      len += copy_str(&wbuf[len], "old/unused name <");
+	      len += copy_str(&wbuf[len], node->symname);
+	      r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+				">, port = %d, fd = %d \n",
+				node->port, node->fd);
+	      if (r < 0)
+		  goto failed_dump_resp2;
+	      len += r + 1;
+	      if (reply(g, fd,wbuf,len) != len)
 	      {
+	      failed_dump_resp2:
 		dbg_tty_printf(g,1,"failed to send DUMP_RESP");
 		return;
 	      }
@@ -933,7 +1066,7 @@ static int node_unreg(EpmdVars *g,char *name)
   Node *node  = g->nodes.reg;	/* Point to first node */
 
   for (; node; prev = &node->next, node = node->next)
-    if (strcmp(node->symname, name) == 0)
+    if (is_same_str(node->symname, name))
       {
 	dbg_tty_printf(g,1,"unregistering '%s:%d', port %d",
 		       node->symname, node->creation, node->port);
@@ -1013,6 +1146,7 @@ static int node_unreg_sock(EpmdVars *g,int fd)
  */
 
 static Node *node_reg2(EpmdVars *g,
+		       int namelen,
 		       char* name,
 		       int fd,
 		       int port,
@@ -1025,6 +1159,7 @@ static Node *node_reg2(EpmdVars *g,
 {
   Node *prev;			/* Point to previous node or NULL */
   Node *node;			/* Point to first node */
+  int sz;
 
   /* Can be NULL; means old style */
   if (extra == NULL)
@@ -1032,21 +1167,47 @@ static Node *node_reg2(EpmdVars *g,
 
   /* Fail if node name is too long */
 
-  if (strlen(name) > MAXSYMLEN)
+
+  if (namelen > MAXSYMLEN)
     {
-      dbg_printf(g,0,"node name is too long (%d) %s", strlen(name), name);
+    too_long_name:
+      dbg_printf(g,0,"node name is too long (%d) %s", namelen, name);
       return NULL;
     }
+
+  sz = verify_utf8(name, namelen, 0);
+  if (sz > 255)
+      goto too_long_name;
+
+  if (sz < 0) {
+      dbg_printf(g,0,"invalid node name encoding");
+      return NULL;
+  }
+
   if (extralen > MAXSYMLEN)
     {
-      dbg_printf(g,0,"extra data is too long (%d) %s", strlen(name), name);
+#if 0
+    too_long_extra:
+#endif
+      dbg_printf(g,0,"extra data is too long (%d) %s", extralen, extra);
       return NULL;
     }
 
+#if 0 /* Should we require valid utf8 here? */
+  sz = verify_utf8(extra, extralen, 0);
+  if (sz > 255)
+      goto too_long_extra;
+
+  if (sz < 0) {
+      dbg_printf(g,0,"invalid extra data encoding");
+      return NULL;
+  }
+#endif
+
   /* Fail if it is already registered */
 
   for (node = g->nodes.reg; node; node = node->next)
-    if (strcmp(node->symname, name) == 0)
+    if (is_same_str(node->symname, name))
       {
 	dbg_printf(g,0,"node name already occupied %s", name);
 	return NULL;
@@ -1058,7 +1219,7 @@ static Node *node_reg2(EpmdVars *g,
   prev = NULL;
 
   for (node = g->nodes.unreg; node; prev = node, node = node->next)
-    if (strcmp(node->symname, name) == 0)
+    if (is_same_str(node->symname, name))
       {
 	dbg_tty_printf(g,1,"reusing slot with same name '%s'", node->symname);
 
@@ -1126,7 +1287,7 @@ static Node *node_reg2(EpmdVars *g,
   node->lowvsn   = lowvsn;
   node->extralen = extralen;
   memcpy(node->extra,extra,extralen);
-  strcpy(node->symname,name);
+  copy_str(node->symname,name);
   select_fd_set(g, fd);
 
   if (highvsn == 0) {
diff --git a/erts/epmd/test/epmd_SUITE.erl b/erts/epmd/test/epmd_SUITE.erl
index fd9969ae2b..fc0abef400 100644
--- a/erts/epmd/test/epmd_SUITE.erl
+++ b/erts/epmd/test/epmd_SUITE.erl
@@ -45,6 +45,8 @@
     register_names_1/1,
     register_names_2/1,
     register_duplicate_name/1,
+    unicode_name/1,
+    long_unicode_name/1,
     get_port_nr/1,
     slow_get_port_nr/1,
     unregister_others_name_1/1,
@@ -107,7 +109,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     [register_name, register_names_1, register_names_2,
-     register_duplicate_name, get_port_nr, slow_get_port_nr,
+     register_duplicate_name, unicode_name, long_unicode_name,
+     get_port_nr, slow_get_port_nr,
      unregister_others_name_1, unregister_others_name_2,
      register_overflow, name_with_null_inside,
      name_null_terminated, stupid_names_req, no_data,
@@ -197,6 +200,37 @@ register_duplicate_name(Config) when is_list(Config) ->
     ?line ok = close(Sock),			% Unregister
     ok.
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+unicode_name(doc) ->
+    ["Check that we can register and lookup a unicode name"];
+unicode_name(suite) ->
+    [];
+unicode_name(Config) when is_list(Config) ->
+    ok = epmdrun(),
+    NodeName = [16#1f608],
+    {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []),
+    {ok,NodeInfo} = port_please_v2(NodeName),
+    NodeName = NodeInfo#node_info.node_name,
+    ok = close(Sock),
+    ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+long_unicode_name(doc) ->
+    ["Check that we can register and lookup a long unicode name"];
+long_unicode_name(suite) ->
+    [];
+long_unicode_name(Config) when is_list(Config) ->
+    ok = epmdrun(),
+    BaseChar = 16#1f600,
+    NodeName = lists:seq(BaseChar, BaseChar+200), % will be 800 bytes long
+    {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []),
+    {ok,NodeInfo} = port_please_v2(NodeName),
+    NodeName = NodeInfo#node_info.node_name,
+    ok = close(Sock),
+    ok.
+
 % Internal function to register a node name, no close, i.e. unregister
 
 register_node(Name) ->
@@ -205,9 +239,10 @@ register_node(Name,Port) ->
     register_node_v2(Port,$M,0,5,5,Name,"").
 
 register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) ->
+    Utf8Name = unicode:characters_to_binary(Name),
     Req = [?EPMD_ALIVE2_REQ, put16(Port), NodeType, Prot,
 	   put16(HVsn), put16(LVsn),
-	   size16(Name), Name,
+	   put16(size(Utf8Name)), binary_to_list(Utf8Name),
 	   size16(Extra), Extra],
     case send_req(Req) of
 	{ok,Sock} ->
@@ -226,7 +261,8 @@ register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) ->
 % Internal function to fetch information about a node
 
 port_please_v2(Name) ->
-    case send_req([?EPMD_PORT_PLEASE2_REQ, Name]) of
+    case send_req([?EPMD_PORT_PLEASE2_REQ,
+		   binary_to_list(unicode:characters_to_binary(Name))]) of
 	{ok,Sock} ->
 	    case recv_until_sock_closes(Sock) of
 		{ok, Resp} ->
@@ -247,7 +283,7 @@ parse_port2_resp(Resp) ->
 	  ELen:16,Extra:ELen/binary>> when Res =:= 0 ->
 	    {ok, #node_info{port=Port,node_type=NodeType,prot=Prot,
 			    hvsn=HVsn,lvsn=LVsn,
-			    node_name=binary_to_list(NodeName),
+			    node_name=unicode:characters_to_list(NodeName),
 			    extra=binary_to_list(Extra)}};
 	_Other ->
 	    test_server:format("invalid port2 resp: ~p~n",
@@ -737,7 +773,7 @@ buffer_overrun_2(doc) ->
     ["Test security vulnerability in fake extra lengths in alive2_req"];
 buffer_overrun_2(Config) when is_list(Config) ->
     ?line ok = epmdrun(),
-    ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255,10000)],
+    ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255*4,10000)],
     ?line true = alltrue(Rest),
     ok.
 hostile(N) ->
@@ -880,6 +916,7 @@ no_live_killing(Config) when is_list(Config) ->
     ?line close(Sock3),
     ok.
 
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Terminate all tests with killing epmd.
 
-- 
cgit v1.2.3