aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2010-10-13 17:08:32 +0200
committerPatrik Nyblom <[email protected]>2010-11-29 13:59:12 +0100
commit4cf08709189ea8b7e2ae20f85c390abd04ae48ae (patch)
treeb9a1ee11e1672e03d8a80a0b8829bdaab75f66bf /erts
parent25e22145d129a80dcfc02c64dfe0b0d890a5e26d (diff)
downloadotp-4cf08709189ea8b7e2ae20f85c390abd04ae48ae.tar.gz
otp-4cf08709189ea8b7e2ae20f85c390abd04ae48ae.tar.bz2
otp-4cf08709189ea8b7e2ae20f85c390abd04ae48ae.zip
Teach filename to accept raw data and add filename enc option to emu
Diffstat (limited to 'erts')
-rw-r--r--erts/emulator/beam/atom.names1
-rw-r--r--erts/emulator/beam/bif.tab4
-rw-r--r--erts/emulator/beam/erl_init.c22
-rw-r--r--erts/emulator/beam/erl_unicode.c108
-rw-r--r--erts/emulator/beam/sys.h3
-rw-r--r--erts/emulator/sys/common/erl_sys_common_misc.c52
6 files changed, 153 insertions, 37 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 93b8e3ec28..327620772f 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -549,7 +549,6 @@ atom waiting
atom wall_clock
atom warning
atom warning_msg
-atom win_wchar
atom wordsize
atom write_concurrency
atom xor
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 55166417e5..cf251d9016 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -797,8 +797,8 @@ bif erlang:nif_error/2
#
# Helpers for unicode filenames
#
-bif file:name2native/1
-bif file:native2name/1
+bif file:internal_name2native/1
+bif file:internal_native2name/1
bif file:native_name_encoding/0
#
# Obsolete
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index a7892e143b..464ee750f7 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -908,7 +908,27 @@ erl_start(int argc, char **argv)
VERBOSE(DEBUG_SYSTEM,
("using display items %d\n",display_items));
break;
-
+ case 'f':
+ if (!strncmp(argv[i],"-fn",3)) {
+ arg = get_arg(argv[i]+3, argv[i+1], &i);
+ switch (*arg) {
+ case 'u':
+ erts_set_user_requested_filename_encoding(ERL_FILENAME_UTF8);
+ break;
+ case 'l':
+ erts_set_user_requested_filename_encoding(ERL_FILENAME_LATIN1);
+ break;
+ case 'a':
+ erts_set_user_requested_filename_encoding(ERL_FILENAME_UNKNOWN);
+ default:
+ erts_fprintf(stderr, "bad filename encoding %s, can be (l,u or a)\n", arg);
+ erts_usage();
+ }
+ break;
+ } else {
+ erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]);
+ erts_usage();
+ }
case 'l':
display_loads++;
break;
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 671c3c0cdf..3e7a935cef 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1876,6 +1876,10 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
Uint x = unsigned_val(obj);
switch (encoding) {
case ERL_FILENAME_LATIN1:
+ if (x > 255) {
+ DESTROY_ESTACK(stack);
+ return ((Sint) -1);
+ }
need += 1;
break;
case ERL_FILENAME_UTF8:
@@ -2101,12 +2105,76 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
-BIF_RETTYPE file_name2native_1(BIF_ALIST_1)
+BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1)
{
int encoding = erts_get_native_filename_encoding();
Sint need;
Eterm bin_term;
byte* bin_p;
+ if (is_binary(BIF_ARG_1)) {
+ byte *temp_alloc = NULL;
+ byte *bytes;
+ byte *err_pos;
+ Uint size,num_chars;
+ Uint unipoint;
+ /* Uninterpreted encoding except if windows widechar, in case we convert from
+ utf8 to win_wchar */
+ if (encoding != ERL_FILENAME_WIN_WCHAR) {
+ BIF_RET(BIF_ARG_1);
+ }
+ /* In a wchar world, the emulator flags only affect how
+ binaries are interpreted when sent from the user. */
+ /* Determine real length and create a new binary */
+ size = binary_size(BIF_ARG_1);
+ bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
+ if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK ||
+ erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) {
+ /* What to do now? Maybe latin1, so just take byte for byte instead */
+ bin_term = new_binary(BIF_P, 0, size*2);
+ bin_p = binary_bytes(bin_term);
+ while (size--) {
+ *bin_p++ = *bytes++;
+ *bin_p++ = 0;
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(bin_term);
+ }
+ /* OK, UTF8 ok, number of characters is in num_chars */
+ bin_term = new_binary(BIF_P, 0, num_chars*2);
+ bin_p = binary_bytes(bin_term);
+ while (num_chars--) {
+ if (((*bytes) & ((byte) 0x80)) == 0) {
+ unipoint = (Uint) *bytes;
+ ++bytes;
+ } else if (((*bytes) & ((byte) 0xE0)) == 0xC0) {
+ unipoint =
+ (((Uint) ((*bytes) & ((byte) 0x1F))) << 6) |
+ ((Uint) (bytes[1] & ((byte) 0x3F)));
+ bytes += 2;
+ } else if (((*bytes) & ((byte) 0xF0)) == 0xE0) {
+ unipoint =
+ (((Uint) ((*bytes) & ((byte) 0xF))) << 12) |
+ (((Uint) (bytes[1] & ((byte) 0x3F))) << 6) |
+ ((Uint) (bytes[2] & ((byte) 0x3F)));
+ bytes +=3;
+ } else if (((*bytes) & ((byte) 0xF8)) == 0xF0) {
+ unipoint =
+ (((Uint) ((*bytes) & ((byte) 0x7))) << 18) |
+ (((Uint) (bytes[1] & ((byte) 0x3F))) << 12) |
+ (((Uint) (bytes[2] & ((byte) 0x3F))) << 6) |
+ ((Uint) (bytes[3] & ((byte) 0x3F)));
+ bytes += 4;
+ } else {
+ erl_exit(1,"Internal unicode error in file:name2native/1");
+ }
+ *bin_p++ = (byte) (unipoint & 0xFF);
+ *bin_p++ = (byte) ((unipoint >> 8) & 0xFF);
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(bin_term);
+ } /* binary */
+
+
if ((need = simple_char_need(BIF_ARG_1,encoding)) < 0) {
BIF_ERROR(BIF_P,BADARG);
}
@@ -2116,7 +2184,7 @@ BIF_RETTYPE file_name2native_1(BIF_ALIST_1)
BIF_RET(bin_term);
}
-BIF_RETTYPE file_native2name_1(BIF_ALIST_1)
+BIF_RETTYPE file_internal_native2name_1(BIF_ALIST_1)
{
Eterm real_bin;
Uint offset;
@@ -2144,12 +2212,15 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1)
}
switch (erts_get_native_filename_encoding()) {
case ERL_FILENAME_LATIN1:
- goto simple;
+ hp = HAlloc(BIF_P, 2 * size);
+ bytes = binary_bytes(real_bin)+offset;
+
+ BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs));
case ERL_FILENAME_UTF8:
bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK) {
erts_free_aligned_binary_bytes(temp_alloc);
- goto simple;
+ goto noconvert;
}
num_built = 0;
num_eaten = 0;
@@ -2157,12 +2228,16 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1)
erts_free_aligned_binary_bytes(temp_alloc);
BIF_RET(ret);
case ERL_FILENAME_WIN_WCHAR:
- if ((size % 2) != 0) {
- goto simple;
- }
bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
- hp = HAlloc(BIF_P, size);
- ret = NIL;
+ if ((size % 2) != 0) { /* Panic fixup to avoid crashing the emulator */
+ size--;
+ hp = HAlloc(BIF_P, size+2);
+ ret = CONS(hp,make_small((Uint) bytes[size]),NIL);
+ hp += 2;
+ } else {
+ hp = HAlloc(BIF_P, size);
+ ret = NIL;
+ }
bytes += size-1;
while (size > 0) {
Uint x = ((Uint) *bytes--) << 8;
@@ -2173,13 +2248,10 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1)
erts_free_aligned_binary_bytes(temp_alloc);
BIF_RET(ret);
default:
- goto simple;
+ goto noconvert;
}
- simple:
- hp = HAlloc(BIF_P, 2 * size);
- bytes = binary_bytes(real_bin)+offset;
-
- BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs));
+ noconvert:
+ BIF_RET(BIF_ARG_1);
}
BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0)
@@ -2190,7 +2262,11 @@ BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0)
case ERL_FILENAME_UTF8:
BIF_RET(am_utf8);
case ERL_FILENAME_WIN_WCHAR:
- BIF_RET(am_win_wchar);
+ if (erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) {
+ BIF_RET(am_latin1);
+ } else {
+ BIF_RET(am_utf8);
+ }
default:
BIF_RET(am_undefined);
}
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index d14e0ac105..57f2b2f16c 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -1263,6 +1263,9 @@ char* win32_errorstr(int);
#define ERL_FILENAME_WIN_WCHAR 3
int erts_get_native_filename_encoding(void);
+/* The set function is only to be used by erl_init! */
+void erts_set_user_requested_filename_encoding(int encoding);
+int erts_get_user_requested_filename_encoding(void);
void erts_init_sys_common_misc(void);
diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c
index dbb59676c8..581c14b6c6 100644
--- a/erts/emulator/sys/common/erl_sys_common_misc.c
+++ b/erts/emulator/sys/common/erl_sys_common_misc.c
@@ -42,33 +42,51 @@
/* Written once and only once */
static int filename_encoding = ERL_FILENAME_UNKNOWN;
+#if defined(__WIN32__)
+static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */
+#else
+static int user_filename_encoding = ERL_FILENAME_LATIN1;
+#endif
+void erts_set_user_requested_filename_encoding(int encoding)
+{
+ user_filename_encoding = encoding;
+}
+
+int erts_get_user_requested_filename_encoding(void)
+{
+ return user_filename_encoding;
+}
void erts_init_sys_common_misc(void)
{
#if defined(__WIN32__)
filename_encoding = ERL_FILENAME_WIN_WCHAR;
#else
- char *l;
- filename_encoding = ERL_FILENAME_LATIN1;
+ if (user_filename_encoding != ERL_FILENAME_UNKNOWN) {
+ filename_encoding = user_filename_encoding;
+ } else {
+ char *l;
+ filename_encoding = ERL_FILENAME_LATIN1;
# ifdef PRIMITIVE_UTF8_CHECK
- setlocale(LC_CTYPE, ""); /* Set international environment,
- ignore result */
- if (((l = getenv("LC_ALL")) && *l) ||
- ((l = getenv("LC_CTYPE")) && *l) ||
- ((l = getenv("LANG")) && *l)) {
- if (strstr(l, "UTF-8")) {
- filename_encoding = ERL_FILENAME_UTF8;
- }
- }
-
+ setlocale(LC_CTYPE, ""); /* Set international environment,
+ ignore result */
+ if (((l = getenv("LC_ALL")) && *l) ||
+ ((l = getenv("LC_CTYPE")) && *l) ||
+ ((l = getenv("LANG")) && *l)) {
+ if (strstr(l, "UTF-8")) {
+ filename_encoding = ERL_FILENAME_UTF8;
+ }
+ }
+
# else
- l = setlocale(LC_CTYPE, ""); /* Set international environment */
- if (l != NULL) {
- if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) {
- filename_encoding = ERL_FILENAME_UTF8;
+ l = setlocale(LC_CTYPE, ""); /* Set international environment */
+ if (l != NULL) {
+ if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) {
+ filename_encoding = ERL_FILENAME_UTF8;
+ }
}
- }
# endif
+ }
#endif
}