aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_unicode.c
diff options
context:
space:
mode:
authorSverker Eriksson <sverker@erlang.org>2013-10-16 18:15:47 +0200
committerSverker Eriksson <sverker@erlang.org>2013-10-16 18:16:26 +0200
commit717cf073d2c4ccbb508a272486ec83369ed1f043 (patch)
tree86d6321d6ab6e906297b602e151630277b78997c /erts/emulator/beam/erl_unicode.c
parentb95da0ad6236be268d63fd960934c787971e1fd0 (diff)
parentb6b0b73ecec7facefb3b9c5a7ef663599cfee4aa (diff)
downloadotp-717cf073d2c4ccbb508a272486ec83369ed1f043.tar.gz
otp-717cf073d2c4ccbb508a272486ec83369ed1f043.tar.bz2
otp-717cf073d2c4ccbb508a272486ec83369ed1f043.zip
Merge branch 'sverk/load-nif-unicode'
OTP-11408 * sverk/load-nif-unicode: erts: Fix bug in atom to filename conversions Fix open_ddll for win erts, crypto: Support NIF library with unicode filename on windows erts: Factor out erts_convert_filename_to_wchar() erts: Fix compiler warning erts: Fix loading of NIF library with unicode in path erts: Remove unused constant DRIVER_TAB_SIZE
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r--erts/emulator/beam/erl_unicode.c142
1 files changed, 93 insertions, 49 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index a363051062..7e3c6681d9 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -2026,11 +2026,11 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
} else if (is_binary(name)) {
byte *temp_alloc = NULL;
byte *bytes;
- byte *err_pos;
- Uint size,num_chars;
+ Uint size;
size = binary_size(name);
bytes = erts_get_aligned_binary_bytes(name, &temp_alloc);
+
if (encoding != ERL_FILENAME_WIN_WCHAR) {
/*Add 0 termination only*/
if (used)
@@ -2042,36 +2042,11 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
}
memcpy(name_buf,bytes,size);
name_buf[size]=0;
- } else if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK ||
- erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) {
- byte *p;
- /* What to do now? Maybe latin1, so just take byte for byte instead */
- if (used)
- *used = (Sint) (size+1)*2;
- if ((size+1)*2 > statbuf_size) {
- name_buf = (char *) erts_alloc(alloc_type, (size+1)*2);
- } else {
- name_buf = statbuf;
- }
- p = (byte *) name_buf;
- while (size--) {
- *p++ = *bytes++;
- *p++ = 0;
- }
- *p++ = 0;
- *p++ = 0;
- } else { /* WIN_WCHAR and valid UTF8 */
- if (used)
- *used = (Sint) (num_chars+1)*2;
- if ((num_chars+1)*2 > statbuf_size) {
- name_buf = (char *) erts_alloc(alloc_type, (num_chars+1)*2);
- } else {
- name_buf = statbuf;
- }
- erts_copy_utf8_to_utf16_little((byte *) name_buf, bytes, num_chars);
- name_buf[num_chars*2] = 0;
- name_buf[num_chars*2+1] = 0;
- }
+ } else {
+ name_buf = erts_convert_filename_to_wchar(bytes, size,
+ statbuf, statbuf_size,
+ alloc_type, used, 0);
+ }
erts_free_aligned_binary_bytes(temp_alloc);
} else {
return NULL;
@@ -2079,6 +2054,50 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
return name_buf;
}
+char* erts_convert_filename_to_wchar(byte* bytes, Uint size,
+ char *statbuf, size_t statbuf_size,
+ ErtsAlcType_t alloc_type, Sint* used,
+ Uint extra_wchars)
+{
+ byte *err_pos;
+ Uint num_chars;
+ char* name_buf = NULL;
+ Sint need;
+ char *p;
+
+ if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK ||
+ erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) {
+
+ /* What to do now? Maybe latin1, so just take byte for byte instead */
+ need = (Sint) (size + extra_wchars + 1) * 2;
+ if (need > statbuf_size) {
+ name_buf = (char *) erts_alloc(alloc_type, need);
+ } else {
+ name_buf = statbuf;
+ }
+ p = name_buf;
+ while (size--) {
+ *p++ = *bytes++;
+ *p++ = 0;
+ }
+ } else { /* WIN_WCHAR and valid UTF8 */
+ need = (Sint) (num_chars + extra_wchars + 1) * 2;
+ if (need > statbuf_size) {
+ name_buf = (char *) erts_alloc(alloc_type, need);
+ } else {
+ name_buf = statbuf;
+ }
+ erts_copy_utf8_to_utf16_little((byte *) name_buf, bytes, num_chars);
+ p = name_buf + num_chars*2;
+ }
+ *p++ = 0;
+ *p++ = 0;
+ if (used)
+ *used = p - name_buf;
+ return name_buf;
+}
+
+
static int filename_len_16bit(byte *str)
{
byte *p = str;
@@ -2158,16 +2177,31 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
ap = atom_tab(atom_val(ioterm));
switch (encoding) {
case ERL_FILENAME_LATIN1:
- need = ap->len;
+ need = ap->latin1_chars; /* May be -1 */
break;
case ERL_FILENAME_UTF8_MAC:
case ERL_FILENAME_UTF8:
- for (i = 0; i < ap->len; i++) {
- need += (ap->name[i] >= 0x80) ? 2 : 1;
- }
+ need = ap->len;
break;
case ERL_FILENAME_WIN_WCHAR:
- need = 2*(ap->len);
+ if (ap->latin1_chars >= 0) {
+ need = 2* ap->latin1_chars;
+ }
+ else {
+ for (i = 0; i < ap->len; ) {
+ if (ap->name[i] < 0x80) {
+ i++;
+ } else if (ap->name[i] < 0xE0) {
+ i += 2;
+ } else if (ap->name[i] < 0xF0) {
+ i += 3;
+ } else {
+ need = -1;
+ break;
+ }
+ need += 2;
+ }
+ }
break;
default:
need = -1;
@@ -2297,26 +2331,36 @@ void erts_native_filename_put(Eterm ioterm, int encoding, byte *p)
switch (encoding) {
case ERL_FILENAME_LATIN1:
for (i = 0; i < ap->len; i++) {
- *p++ = ap->name[i];
- }
- break;
- case ERL_FILENAME_UTF8_MAC:
- case ERL_FILENAME_UTF8:
- for (i = 0; i < ap->len; i++) {
- if(ap->name[i] < 0x80) {
+ if (ap->name[i] < 0x80) {
*p++ = ap->name[i];
} else {
- *p++ = (((ap->name[i]) >> 6) | ((byte) 0xC0));
- *p++ = (((ap->name[i]) & 0x3F) | ((byte) 0x80));
+ ASSERT(ap->name[i] < 0xC4);
+ *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F);
+ i++;
}
}
break;
+ case ERL_FILENAME_UTF8_MAC:
+ case ERL_FILENAME_UTF8:
+ sys_memcpy(p, ap->name, ap->len);
+ break;
case ERL_FILENAME_WIN_WCHAR:
for (i = 0; i < ap->len; i++) {
/* Little endian */
- *p++ = ap->name[i];
- *p++ = 0;
- }
+ if (ap->name[i] < 0x80) {
+ *p++ = ap->name[i];
+ *p++ = 0;
+ } else if (ap->name[i] < 0xE0) {
+ *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F);
+ *p++ = ((ap->name[i] & 0x1C) >> 2);
+ i++;
+ } else {
+ ASSERT(ap->name[i] < 0xF0);
+ *p++ = ((ap->name[i+1] & 3) << 6) | (ap->name[i+2] & 0x3C);
+ *p++ = ((ap->name[i] & 0xF) << 4) | ((ap->name[i+1] & 0x3C) >> 2);
+ i += 2;
+ }
+ }
break;
default:
ASSERT(0);