From fed20c731b91f1debb11a809cc5999d8b04dd293 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 22 Nov 2010 16:24:59 +0100 Subject: Teach spawn_executable about Unicode Also corrected compressed files on Windows --- erts/emulator/beam/erl_bif_port.c | 91 +++++++++++++++++---------------------- erts/emulator/beam/erl_unicode.c | 73 +++++++++++++++++++++++++++++++ erts/emulator/beam/global.h | 2 + 3 files changed, 114 insertions(+), 52 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_bif_port.c b/erts/emulator/beam/erl_bif_port.c index 378c5e73fd..fbc92b9730 100644 --- a/erts/emulator/beam/erl_bif_port.c +++ b/erts/emulator/beam/erl_bif_port.c @@ -610,6 +610,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) int binary_io; int soft_eof; Sint linebuf; + Eterm edir = NIL; byte dir[MAXPATHLEN]; /* These are the defaults */ @@ -686,19 +687,10 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) } else if (option == am_arg0) { char *a0; - int n; - if (is_nil(*tp)) { - n = 0; - } else if( (n = is_string(*tp)) == 0) { + + if ((a0 = erts_convert_filename_to_native(*tp, ERTS_ALC_T_TMP, 1)) == NULL) { goto badarg; } - a0 = (char *) erts_alloc(ERTS_ALC_T_TMP, - (n + 1) * sizeof(byte)); - if (intlist_to_buf(*tp, a0, n) != n) { - erl_exit(1, "%s:%d: Internal error\n", - __FILE__, __LINE__); - } - a0[n] = '\0'; if (opts.argv == NULL) { opts.argv = erts_alloc(ERTS_ALC_T_TMP, 2 * sizeof(char **)); @@ -711,22 +703,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) opts.argv[0] = a0; } } else if (option == am_cd) { - Eterm iolist; - DeclareTmpHeap(heap,4,p); - int r; - - UseTmpHeap(4,p); - heap[0] = *tp; - heap[1] = make_list(heap+2); - heap[2] = make_small(0); - heap[3] = NIL; - iolist = make_list(heap); - r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN); - UnUseTmpHeap(4,p); - if (r < 0) { - goto badarg; - } - opts.wd = (char *) dir; + edir = *tp; } else { goto badarg; } @@ -838,19 +815,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) goto badarg; } name = tp[1]; - if (is_atom(name)) { - name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP, - atom_tab(atom_val(name))->len+1); - sys_memcpy((void *) name_buf, - (void *) atom_tab(atom_val(name))->name, - atom_tab(atom_val(name))->len); - name_buf[atom_tab(atom_val(name))->len] = '\0'; - } else if ((i = is_string(name))) { - name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1); - if (intlist_to_buf(name, name_buf, i) != i) - erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__); - name_buf[i] = '\0'; - } else { + if ((name_buf = erts_convert_filename_to_native(name,ERTS_ALC_T_TMP,0)) == NULL) { goto badarg; } opts.spawn_type = ERTS_SPAWN_EXECUTABLE; @@ -892,7 +857,33 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) /* Argument vector only if explicit spawn_executable */ goto badarg; } - + + if (edir != NIL) { + /* A working directory is expressed differently if spawn_executable, i.e. Unicode is handles + for spawn_executable... */ + if (opts.spawn_type != ERTS_SPAWN_EXECUTABLE) { + Eterm iolist; + DeclareTmpHeap(heap,4,p); + int r; + + UseTmpHeap(4,p); + heap[0] = edir; + heap[1] = make_list(heap+2); + heap[2] = make_small(0); + heap[3] = NIL; + iolist = make_list(heap); + r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN); + UnUseTmpHeap(4,p); + if (r < 0) { + goto badarg; + } + opts.wd = (char *) dir; + } else { + if ((opts.wd = erts_convert_filename_to_native(edir,ERTS_ALC_T_TMP,0)) == NULL) { + goto badarg; + } + } + } if (driver != &spawn_driver && opts.exit_status) { goto badarg; @@ -941,6 +932,9 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) if (opts.argv) { free_args(opts.argv); } + if (opts.wd && opts.wd != ((char *)dir)) { + erts_free(ERTS_ALC_T_TMP, (void *) opts.wd); + } return port_num; badarg: @@ -950,6 +944,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) #undef OPEN_PORT_ERROR } +/* Arguments can be given i unicode and as raw binaries, convert filename is used to convert */ static char **convert_args(Eterm l) { char **pp; @@ -966,22 +961,14 @@ static char **convert_args(Eterm l) pp[i++] = erts_default_arg0; while (is_list(l)) { str = CAR(list_val(l)); - - if (is_nil(str)) { - n = 0; - } else if( (n = is_string(str)) == 0) { - /* Not a string... */ + if ((b = erts_convert_filename_to_native(str,ERTS_ALC_T_TMP,1)) == NULL) { int j; for (j = 1; j < i; ++j) erts_free(ERTS_ALC_T_TMP, pp[j]); erts_free(ERTS_ALC_T_TMP, pp); return NULL; - } - b = (char *) erts_alloc(ERTS_ALC_T_TMP, (n + 1) * sizeof(byte)); - pp[i++] = (char *) b; - if (intlist_to_buf(str, b, n) != n) - erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__); - b[n] = '\0'; + } + pp[i++] = b; l = CDR(list_val(l)); } pp[i] = NULL; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 1e729cfc2e..3434ce3979 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2023,6 +2023,79 @@ BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2) * Simpler non-interruptable routines for UTF-8 and * Windowish UTF-16 (restricted) **********************************************************/ +/* + * This function is the heart of the Unicode support for + * open_port - spawn_executable. It converts both the name + * of the executable and the arguments according to the same rules + * as for filename conversion. That means as if your arguments are + * to be raw, you supply binaries, else unicode characters are allowed up to + * the encoding maximum (256 of the unicode max). + * Depending on the filename encoding standard, the vector is then + * converted to whatever is used, which might mean win_utf16 if on windows. + * Do not peek into the argument vector or filenam with ordinary + * string routines, that will certainly fail on some OS. + */ + +char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty) +{ + int encoding = erts_get_native_filename_encoding(); + char* name_buf = NULL; + + if (is_atom(name) || is_list(name) || (allow_empty && is_nil(name))) { + Sint need; + if ((need = erts_native_filename_need(name,encoding)) < 0) { + return NULL; + } + if (encoding == ERL_FILENAME_WIN_WCHAR) { + need += 2; + } else { + ++need; + } + name_buf = (char *) erts_alloc(alloc_type, need); + erts_native_filename_put(name,encoding,(byte *)name_buf); + name_buf[need-1] = 0; + if (encoding == ERL_FILENAME_WIN_WCHAR) { + name_buf[need-2] = 0; + } + } else if (is_binary(name)) { + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + Uint size,num_chars; + + size = binary_size(name); + bytes = erts_get_aligned_binary_bytes(name, &temp_alloc); + if (encoding != ERL_FILENAME_WIN_WCHAR) { + /*Add 0 termination only*/ + name_buf = (char *) erts_alloc(alloc_type, size+1); + memcpy(name_buf,bytes,size); + name_buf[size]=0; + } else if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK || + erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + byte *p; + /* What to do now? Maybe latin1, so just take byte for byte instead */ + name_buf = (char *) erts_alloc(alloc_type, (size+1)*2); + p = (byte *) name_buf; + while (size--) { + *p++ = *bytes++; + *p++ = 0; + } + *p++ = 0; + *p++ = 0; + } else { /* WIN_WCHAR and valid UTF8 */ + name_buf = (char *) erts_alloc(alloc_type, (num_chars+1)*2); + erts_copy_utf8_to_utf16_little((byte *) name_buf, bytes, num_chars); + name_buf[num_chars*2] = 0; + name_buf[num_chars*2+1] = 0; + } + erts_free_aligned_binary_bytes(temp_alloc); + } else { + return NULL; + } + return name_buf; +} + + Sint erts_native_filename_need(Eterm ioterm, int encoding) { Eterm *objp; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 6e86de3ebf..524db2a2eb 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1602,6 +1602,8 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding); void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars); int erts_analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left); +char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty); + #define ERTS_UTF8_OK 0 #define ERTS_UTF8_INCOMPLETE 1 #define ERTS_UTF8_ERROR 2 -- cgit v1.2.3