From b553664f54034e8c04ae6f9cc44f16b7f516518b Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Fri, 11 Jan 2013 17:27:29 +0100 Subject: erl_interface: utf8 atoms continued --- lib/erl_interface/src/legacy/erl_eterm.c | 158 ++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 34 deletions(-) (limited to 'lib/erl_interface/src/legacy/erl_eterm.c') diff --git a/lib/erl_interface/src/legacy/erl_eterm.c b/lib/erl_interface/src/legacy/erl_eterm.c index 8d559f0f55..aa0fd5ddcf 100644 --- a/lib/erl_interface/src/legacy/erl_eterm.c +++ b/lib/erl_interface/src/legacy/erl_eterm.c @@ -36,6 +36,7 @@ #include "erl_error.h" #include "erl_internal.h" #include "ei_internal.h" +#include "putget.h" #define ERL_IS_BYTE(x) (ERL_IS_INTEGER(x) && (ERL_INT_VALUE(x) & ~0xFF) == 0) @@ -142,9 +143,7 @@ ETERM *erl_mk_atom (const char *s) ep = erl_alloc_eterm(ERL_ATOM); ERL_COUNT(ep) = 1; - ERL_ATOM_SIZE(ep) = strlen(s); - if ((ERL_ATOM_PTR(ep) = strsave(s)) == NULL) - { + if (erl_atom_init_latin1(&ep->uval.aval.d, s) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; @@ -152,6 +151,65 @@ ETERM *erl_mk_atom (const char *s) return ep; } +char* erl_atom_ptr_latin1(Erl_Atom_data* a) +{ + if (a->latin1 == NULL) { + enum erlang_char_encoding enc; + a->lenL = utf8_to_latin1(NULL, a->utf8, a->lenU, a->lenU, &enc); + if (a->lenL < 0) { + a->lenL = 0; + return NULL; + } + if (enc == ERLANG_ASCII) { + a->latin1 = a->utf8; + } + else { + a->latin1 = malloc(a->lenL+1); + utf8_to_latin1(a->latin1, a->utf8, a->lenU, a->lenL, NULL); + a->latin1[a->lenL] = '\0'; + } + } + return a->latin1; +} + +char* erl_atom_ptr_utf8(Erl_Atom_data* a) +{ + if (a->utf8 == NULL) { + int dlen = a->lenL * 2; /* over estimation */ + a->utf8 = malloc(dlen + 1); + a->lenU = latin1_to_utf8(a->utf8, a->latin1, a->lenL, dlen, NULL); + a->utf8[a->lenU] = '\0'; + } + return a->utf8; + +} +int erl_atom_size_latin1(Erl_Atom_data* a) +{ + if (a->latin1 == NULL) { + erl_atom_ptr_latin1(a); + } + return a->lenL; +} +int erl_atom_size_utf8(Erl_Atom_data* a) +{ + if (a->utf8 == NULL) { + erl_atom_ptr_utf8(a); + } + return a->lenU; +} +char* erl_atom_init_latin1(Erl_Atom_data* a, const char* s) +{ + a->lenL = strlen(s); + if ((a->latin1 = strsave(s)) == NULL) + { + return NULL; + } + a->utf8 = NULL; + a->lenU = 0; + return a->latin1; +} + + /* * Given a string as input, creates a list. */ @@ -208,12 +266,19 @@ ETERM *erl_mk_pid(const char *node, ep = erl_alloc_eterm(ERL_PID); ERL_COUNT(ep) = 1; - if ((ERL_PID_NODE(ep) = strsave(node)) == NULL) + if (erl_atom_init_latin1(&ep->uval.pidval.node, node) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; } + erl_mk_pid_helper(ep, number, serial, creation); + return ep; +} + +void erl_mk_pid_helper(ETERM *ep, unsigned int number, + unsigned int serial, unsigned char creation) +{ ERL_PID_NUMBER(ep) = number & 0x7fff; /* 15 bits */ if (ei_internal_use_r9_pids_ports()) { ERL_PID_SERIAL(ep) = serial & 0x07; /* 3 bits */ @@ -222,7 +287,6 @@ ETERM *erl_mk_pid(const char *node, ERL_PID_SERIAL(ep) = serial & 0x1fff; /* 13 bits */ } ERL_PID_CREATION(ep) = creation & 0x03; /* 2 bits */ - return ep; } /* @@ -239,12 +303,18 @@ ETERM *erl_mk_port(const char *node, ep = erl_alloc_eterm(ERL_PORT); ERL_COUNT(ep) = 1; - if ((ERL_PORT_NODE(ep) = strsave(node)) == NULL) + if (erl_atom_init_latin1(&ep->uval.portval.node, node) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; } + erl_mk_port_helper(ep, number, creation); + return ep; +} + +void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation) +{ if (ei_internal_use_r9_pids_ports()) { ERL_PORT_NUMBER(ep) = number & 0x3ffff; /* 18 bits */ } @@ -252,29 +322,29 @@ ETERM *erl_mk_port(const char *node, ERL_PORT_NUMBER(ep) = number & 0x0fffffff; /* 18 bits */ } ERL_PORT_CREATION(ep) = creation & 0x03; /* 2 bits */ - return ep; } /* * Create any kind of reference. */ -ETERM *__erl_mk_reference (const char *node, +ETERM *__erl_mk_reference (ETERM* t, + const char *node, size_t len, unsigned int n[], unsigned char creation) { - ETERM * t; - - if (node == NULL) return NULL; - - t = erl_alloc_eterm(ERL_REF); - ERL_COUNT(t) = 1; - - if ((ERL_REF_NODE(t) = strsave(node)) == NULL) - { - erl_free_term(t); - erl_errno = ENOMEM; - return NULL; + if (t == NULL) { + if (node == NULL) return NULL; + + t = erl_alloc_eterm(ERL_REF); + ERL_COUNT(t) = 1; + + if (erl_atom_init_latin1(&t->uval.refval.node, node) == NULL) + { + erl_free_term(t); + erl_errno = ENOMEM; + return NULL; + } } ERL_REF_LEN(t) = len; ERL_REF_NUMBERS(t)[0] = n[0] & 0x3ffff; /* 18 bits */ @@ -294,7 +364,7 @@ ETERM *erl_mk_ref (const char *node, { unsigned int n[3] = {0, 0, 0}; n[0] = number; - return __erl_mk_reference(node, 1, n, creation); + return __erl_mk_reference(NULL, node, 1, n, creation); } /* @@ -307,7 +377,7 @@ erl_mk_long_ref (const char *node, { unsigned int n[3] = {0, 0, 0}; n[0] = n3; n[1] = n2; n[2] = n1; - return __erl_mk_reference(node, 3, n, creation); + return __erl_mk_reference(NULL, node, 3, n, creation); } /* @@ -758,6 +828,28 @@ int erl_iolist_length (const ETERM* term) return -1; } +static int erl_atom_copy(Erl_Atom_data* dst, const Erl_Atom_data* src) +{ + if (src->latin1 == src->utf8) { + dst->latin1 = dst->utf8 = strsave(src->latin1); + dst->lenL = dst->lenU = strlen(src->latin1); + } + else if (src->latin1) { + dst->latin1 = strsave(src->latin1); + dst->lenL = strlen(src->latin1); + dst->utf8 = NULL; + dst->lenU = 0; + } + else { + dst->utf8 = strsave(src->utf8); + dst->lenU = strlen(src->utf8); + dst->latin1 = NULL; + dst->lenL = 0; + } + return (dst->latin1 != NULL || dst->utf8 == NULL); +} + + /* * Return a brand NEW COPY of an ETERM. */ @@ -796,9 +888,7 @@ ETERM *erl_copy_term(const ETERM *ep) ERL_FLOAT_VALUE(cp) = ERL_FLOAT_VALUE(ep); break; case ERL_ATOM: - ERL_ATOM_SIZE(cp) = ERL_ATOM_SIZE(ep); - ERL_ATOM_PTR(cp) = strsave(ERL_ATOM_PTR(ep)); - if (ERL_ATOM_PTR(cp) == NULL) + if (!erl_atom_copy(&cp->uval.aval.d, &ep->uval.aval.d)) { erl_free_term(cp); erl_errno = ENOMEM; @@ -810,17 +900,17 @@ ETERM *erl_copy_term(const ETERM *ep) name and plug in. Somewhat ugly (also done with port and ref below). */ memcpy(&cp->uval.pidval, &ep->uval.pidval, sizeof(Erl_Pid)); - ERL_PID_NODE(cp) = strsave(ERL_PID_NODE(ep)); + erl_atom_copy(&cp->uval.pidval.node, &ep->uval.pidval.node); ERL_COUNT(cp) = 1; break; case ERL_PORT: memcpy(&cp->uval.portval, &ep->uval.portval, sizeof(Erl_Port)); - ERL_PORT_NODE(cp) = strsave(ERL_PORT_NODE(ep)); + erl_atom_copy(&cp->uval.portval.node, &ep->uval.portval.node); ERL_COUNT(cp) = 1; break; case ERL_REF: memcpy(&cp->uval.refval, &ep->uval.refval, sizeof(Erl_Ref)); - ERL_REF_NODE(cp) = strsave(ERL_REF_NODE(ep)); + erl_atom_copy(&cp->uval.refval.node, &ep->uval.refval.node); ERL_COUNT(cp) = 1; break; case ERL_LIST: @@ -883,29 +973,29 @@ int erl_print_term(FILE *fp, const ETERM *ep) j = i = doquote = 0; switch(ERL_TYPE(ep)) { - case ERL_ATOM: + case ERL_ATOM: { + char* adata = ERL_ATOM_PTR(ep); /* FIXME: what if some weird locale is in use? */ - if (!islower((int)ERL_ATOM_PTR(ep)[0])) + if (!islower(adata[0])) doquote = 1; for (i = 0; !doquote && i < ERL_ATOM_SIZE(ep); i++) { - doquote = !(isalnum((int)ERL_ATOM_PTR(ep)[i]) - || (ERL_ATOM_PTR(ep)[i] == '_')); + doquote = !(isalnum(adata[i]) || (adata[i] == '_')); } if (doquote) { putc('\'', fp); ch_written++; } - fputs(ERL_ATOM_PTR(ep), fp); + fputs(adata, fp); ch_written += ERL_ATOM_SIZE(ep); if (doquote) { putc('\'', fp); ch_written++; } break; - + } case ERL_VARIABLE: if (!isupper((int)ERL_VAR_NAME(ep)[0])) { doquote = 1; -- cgit v1.2.3