From 25e22145d129a80dcfc02c64dfe0b0d890a5e26d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 12 Oct 2010 17:22:42 +0200 Subject: Add bifs to translate between erlang filenames and native encoding --- erts/emulator/Makefile.in | 3 +- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/bif.tab | 6 + erts/emulator/beam/binary.c | 10 +- erts/emulator/beam/erl_binary.h | 2 + erts/emulator/beam/erl_init.c | 1 + erts/emulator/beam/erl_unicode.c | 384 ++++++++++++++++++++++++- erts/emulator/beam/sys.h | 12 + erts/emulator/sys/common/erl_sys_common_misc.c | 78 +++++ 9 files changed, 490 insertions(+), 7 deletions(-) create mode 100644 erts/emulator/sys/common/erl_sys_common_misc.c diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 4ed0ccabc6..6c33e2ca16 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -796,7 +796,8 @@ endif OS_OBJS += $(OBJDIR)/erl_mseg.o \ $(OBJDIR)/erl_$(ERLANG_OSTYPE)_sys_ddll.o \ - $(OBJDIR)/erl_mtrace_sys_wrap.o + $(OBJDIR)/erl_mtrace_sys_wrap.o \ + $(OBJDIR)/erl_sys_common_misc.o HIPE_x86_OS_OBJS=$(HIPE_x86_$(OPSYS)_OBJS) HIPE_x86_OBJS=$(OBJDIR)/hipe_x86.o $(OBJDIR)/hipe_x86_glue.o $(OBJDIR)/hipe_x86_bifs.o $(OBJDIR)/hipe_x86_signal.o $(OBJDIR)/hipe_x86_stack.o $(HIPE_x86_OS_OBJS) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 327620772f..93b8e3ec28 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -549,6 +549,7 @@ atom waiting atom wall_clock atom warning atom warning_msg +atom win_wchar atom wordsize atom write_concurrency atom xor diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 0674aae77f..55166417e5 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -794,6 +794,12 @@ bif binary:decode_unsigned/2 bif erlang:nif_error/1 bif erlang:nif_error/2 +# +# Helpers for unicode filenames +# +bif file:name2native/1 +bif file:native2name/1 +bif file:native_name_encoding/0 # # Obsolete # diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 8ee8fbcb29..4be869f269 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -217,8 +217,8 @@ erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, ErtsAlcType_t al return bytes; } -static Eterm -bin_bytes_to_list(Eterm previous, Eterm* hp, byte* bytes, Uint size, Uint bitoffs) +Eterm +erts_bin_bytes_to_list(Eterm previous, Eterm* hp, byte* bytes, Uint size, Uint bitoffs) { if (bitoffs == 0) { while (size) { @@ -263,7 +263,7 @@ BIF_RETTYPE binary_to_list_1(BIF_ALIST_1) Eterm* hp = HAlloc(BIF_P, 2 * size); byte* bytes = binary_bytes(real_bin)+offset; - BIF_RET(bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); + BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); } error: @@ -295,7 +295,7 @@ BIF_RETTYPE binary_to_list_3(BIF_ALIST_3) } i = stop-start+1; hp = HAlloc(BIF_P, 2*i); - BIF_RET(bin_bytes_to_list(NIL, hp, bytes+start-1, i, bitoffs)); + BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes+start-1, i, bitoffs)); error: BIF_ERROR(BIF_P, BADARG); @@ -339,7 +339,7 @@ BIF_RETTYPE bitstring_to_list_1(BIF_ALIST_1) previous = CONS(hp, make_binary(last), previous); hp += 2; } - BIF_RET(bin_bytes_to_list(previous, hp, bytes, size, bitoffs)); + BIF_RET(erts_bin_bytes_to_list(previous, hp, bytes, size, bitoffs)); } diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index a569fe2e85..bdf0fe23fc 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -152,6 +152,8 @@ do { \ void erts_init_binary(void); byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, ErtsAlcType_t, unsigned extra); +/* Used by unicode module */ +Eterm erts_bin_bytes_to_list(Eterm previous, Eterm* hp, byte* bytes, Uint size, Uint bitoffs); /* * Common implementation for erlang:list_to_binary/1 and binary:list_to_bin/1 diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index a9f4f041ac..a7892e143b 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -251,6 +251,7 @@ erl_init(int ncpu) erts_init_monitors(); erts_init_gc(); init_time(); + erts_init_sys_common_misc(); erts_init_process(ncpu); erts_init_scheduling(use_multi_run_queue, no_schedulers, diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index d01a3661f9..671c3c0cdf 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -463,7 +463,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ } objp = list_val(ioterm); obj = CAR(objp); - if (!is_byte(obj)) + if (!is_small(obj)) break; } } else if (is_nil(obj)) { @@ -1813,3 +1813,385 @@ BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2) { return binary_to_atom(BIF_P, BIF_ARG_1, BIF_ARG_2, 1); } + +/********************************************************** + * Simpler non-interruptable routines for UTF-8 and + * Windowish UTF-16 (restricted) + **********************************************************/ +static Sint simple_char_need(Eterm ioterm, int encoding) +{ + Eterm *objp; + Eterm obj; + DECLARE_ESTACK(stack); + Sint need = 0; + + if (is_atom(ioterm)) { + Atom* ap; + int i; + ap = atom_tab(atom_val(ioterm)); + switch (encoding) { + case ERL_FILENAME_LATIN1: + need = ap->len; + break; + case ERL_FILENAME_UTF8: + for (i = 0; i < ap->len; i++) { + need += (ap->name[i] >= 0x80) ? 2 : 1; + } + break; + case ERL_FILENAME_WIN_WCHAR: + need = 2*(ap->len); + break; + default: + need = -1; + } + DESTROY_ESTACK(stack); + return need; + } + + if (is_nil(ioterm)) { + DESTROY_ESTACK(stack); + return need; + } + if (!is_list(ioterm)) { + DESTROY_ESTACK(stack); + return (Sint) -1; + } + /* OK a list, needs to be processed in order, handling each flat list-level + as they occur, just like io_list_to_binary would */ + ESTACK_PUSH(stack,ioterm); + while (!ESTACK_ISEMPTY(stack)) { + ioterm = ESTACK_POP(stack); + if (is_nil(ioterm)) { + /* ignore empty lists */ + continue; + } + if(is_list(ioterm)) { +L_Again: /* Restart with sublist, old listend was pushed on stack */ + objp = list_val(ioterm); + obj = CAR(objp); + for(;;) { /* loop over one flat list of bytes and binaries + until sublist or list end is encountered */ + if (is_small(obj)) { /* Always small */ + for(;;) { + Uint x = unsigned_val(obj); + switch (encoding) { + case ERL_FILENAME_LATIN1: + need += 1; + break; + case ERL_FILENAME_UTF8: + if (x < 0x80) { + need +=1; + } else if (x < 0x800) { + need += 2; + } else if (x < 0x10000) { + if ((x >= 0xD800 && x <= 0xDFFF) || + (x == 0xFFFE) || + (x == 0xFFFF)) { /* Invalid unicode range */ + DESTROY_ESTACK(stack); + return ((Sint) -1); + } + need += 3; + } else if (x < 0x110000) { + need += 4; + } else { + DESTROY_ESTACK(stack); + return ((Sint) -1); + } + break; + case ERL_FILENAME_WIN_WCHAR: + if (x <= 0xffff) { + need += 2; + break; + } /* else fall throug to error */ + default: + DESTROY_ESTACK(stack); + return ((Sint) -1); + } + + /* everything else will give badarg later + in the process, so we dont check */ + ioterm = CDR(objp); + if (!is_list(ioterm)) { + break; + } + objp = list_val(ioterm); + obj = CAR(objp); + if (!is_small(obj)) + break; + } + } else if (is_nil(obj)) { + ioterm = CDR(objp); + if (!is_list(ioterm)) { + break; + } + objp = list_val(ioterm); + obj = CAR(objp); + } else if (is_list(obj)) { + /* push rest of list for later processing, start + again with sublist */ + ESTACK_PUSH(stack,CDR(objp)); + ioterm = obj; + goto L_Again; + } else { + DESTROY_ESTACK(stack); + return ((Sint) -1); + } + if (is_nil(ioterm) || !is_list(ioterm)) { + break; + } + } /* for(;;) */ + } /* is_list(ioterm) */ + + if (!is_list(ioterm) && !is_nil(ioterm)) { + /* inproper list end */ + DESTROY_ESTACK(stack); + return ((Sint) -1); + } + } /* while not estack empty */ + DESTROY_ESTACK(stack); + return need; +} + +static void simple_put_chars(Eterm ioterm, int encoding, byte *p) +{ + Eterm *objp; + Eterm obj; + DECLARE_ESTACK(stack); + + if (is_atom(ioterm)) { + Atom* ap; + int i; + ap = atom_tab(atom_val(ioterm)); + switch (encoding) { + case ERL_FILENAME_LATIN1: + for (i = 0; i < ap->len; i++) { + *p++ = ap->name[i]; + } + break; + case ERL_FILENAME_UTF8: + for (i = 0; i < ap->len; i++) { + if(ap->name[i] < 0x80) { + *p++ = ap->name[i]; + } else { + *p++ = (((ap->name[i]) >> 6) | ((byte) 0xC0)); + *p++ = (((ap->name[i]) & 0x3F) | ((byte) 0x80)); + } + } + break; + case ERL_FILENAME_WIN_WCHAR: + for (i = 0; i < ap->len; i++) { + /* Little endian */ + *p++ = ap->name[i]; + *p++ = 0; + } + break; + default: + ASSERT(0); + } + DESTROY_ESTACK(stack); + return; + } + + if (is_nil(ioterm)) { + DESTROY_ESTACK(stack); + return; + } + ASSERT(is_list(ioterm)); + /* OK a list, needs to be processed in order, handling each flat list-level + as they occur, just like io_list_to_binary would */ + ESTACK_PUSH(stack,ioterm); + while (!ESTACK_ISEMPTY(stack)) { + ioterm = ESTACK_POP(stack); + if (is_nil(ioterm)) { + /* ignore empty lists */ + continue; + } + if(is_list(ioterm)) { +L_Again: /* Restart with sublist, old listend was pushed on stack */ + objp = list_val(ioterm); + obj = CAR(objp); + for(;;) { /* loop over one flat list of bytes and binaries + until sublist or list end is encountered */ + if (is_small(obj)) { /* Always small */ + for(;;) { + Uint x = unsigned_val(obj); + switch (encoding) { + case ERL_FILENAME_LATIN1: + ASSERT( x < 256); + *p++ = (byte) x; + break; + case ERL_FILENAME_UTF8: + if (x < 0x80) { + *p++ = (byte) x; + } + else if (x < 0x800) { + *p++ = (((byte) (x >> 6)) | + ((byte) 0xC0)); + *p++ = (((byte) (x & 0x3F)) | + ((byte) 0x80)); + } else if (x < 0x10000) { + ASSERT(!((x >= 0xD800 && x <= 0xDFFF) || + (x == 0xFFFE) || + (x == 0xFFFF))); + *p++ = (((byte) (x >> 12)) | + ((byte) 0xE0)); + *p++ = ((((byte) (x >> 6)) & 0x3F) | + ((byte) 0x80)); + *p++ = (((byte) (x & 0x3F)) | + ((byte) 0x80)); + } else { + ASSERT(x < 0x110000); + *p++ = (((byte) (x >> 18)) | + ((byte) 0xF0)); + *p++ = ((((byte) (x >> 12)) & 0x3F) | + ((byte) 0x80)); + *p++ = ((((byte) (x >> 6)) & 0x3F) | + ((byte) 0x80)); + *p++ = (((byte) (x & 0x3F)) | + ((byte) 0x80)); + } + break; + case ERL_FILENAME_WIN_WCHAR: + ASSERT(x <= 0xFFFF); + *p++ = (byte) (x & 0xFFU); + *p++ = (byte) ((x >> 8) & 0xFFU); + break; + default: + ASSERT(0); + } + + /* everything else will give badarg later + in the process, so we dont check */ + ioterm = CDR(objp); + if (!is_list(ioterm)) { + break; + } + objp = list_val(ioterm); + obj = CAR(objp); + if (!is_small(obj)) + break; + } + } else if (is_nil(obj)) { + ioterm = CDR(objp); + if (!is_list(ioterm)) { + break; + } + objp = list_val(ioterm); + obj = CAR(objp); + } else if (is_list(obj)) { + /* push rest of list for later processing, start + again with sublist */ + ESTACK_PUSH(stack,CDR(objp)); + ioterm = obj; + goto L_Again; + } else { + ASSERT(0); + } + if (is_nil(ioterm) || !is_list(ioterm)) { + break; + } + } /* for(;;) */ + } /* is_list(ioterm) */ + + ASSERT(is_list(ioterm) || is_nil(ioterm)); + } /* while not estack empty */ + DESTROY_ESTACK(stack); + return; +} + + + +BIF_RETTYPE file_name2native_1(BIF_ALIST_1) +{ + int encoding = erts_get_native_filename_encoding(); + Sint need; + Eterm bin_term; + byte* bin_p; + if ((need = simple_char_need(BIF_ARG_1,encoding)) < 0) { + BIF_ERROR(BIF_P,BADARG); + } + bin_term = new_binary(BIF_P, 0, need); + bin_p = binary_bytes(bin_term); + simple_put_chars(BIF_ARG_1,encoding,bin_p); + BIF_RET(bin_term); +} + +BIF_RETTYPE file_native2name_1(BIF_ALIST_1) +{ + Eterm real_bin; + Uint offset; + Uint size,num_chars; + Uint bitsize; + Uint bitoffs; + Eterm *hp; + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + Uint num_built; /* characters */ + Uint num_eaten; /* bytes */ + Eterm ret; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + size = binary_size(BIF_ARG_1); + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + BIF_ERROR(BIF_P,BADARG); + } + if (size == 0) { + BIF_RET(NIL); + } + switch (erts_get_native_filename_encoding()) { + case ERL_FILENAME_LATIN1: + goto simple; + case ERL_FILENAME_UTF8: + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK) { + erts_free_aligned_binary_bytes(temp_alloc); + goto simple; + } + num_built = 0; + num_eaten = 0; + ret = do_utf8_to_list(BIF_P, num_chars, bytes, size, num_chars, &num_built, &num_eaten, NIL); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + case ERL_FILENAME_WIN_WCHAR: + if ((size % 2) != 0) { + goto simple; + } + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + hp = HAlloc(BIF_P, size); + ret = NIL; + bytes += size-1; + while (size > 0) { + Uint x = ((Uint) *bytes--) << 8; + x |= ((Uint) *bytes--); + ret = CONS(hp,make_small(x),ret); + size -= 2; + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + default: + goto simple; + } + simple: + hp = HAlloc(BIF_P, 2 * size); + bytes = binary_bytes(real_bin)+offset; + + BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); +} + +BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) +{ + switch (erts_get_native_filename_encoding()) { + case ERL_FILENAME_LATIN1: + BIF_RET(am_latin1); + case ERL_FILENAME_UTF8: + BIF_RET(am_utf8); + case ERL_FILENAME_WIN_WCHAR: + BIF_RET(am_win_wchar); + default: + BIF_RET(am_undefined); + } +} diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 0d15272aa8..d14e0ac105 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1253,6 +1253,18 @@ char* win32_errorstr(int); #endif +/************************************************************************ + * Find out the native filename encoding of the process (look at locale of + * Unix processes and just do UTF16 on windows + ************************************************************************/ +#define ERL_FILENAME_UNKNOWN 0 +#define ERL_FILENAME_LATIN1 1 +#define ERL_FILENAME_UTF8 2 +#define ERL_FILENAME_WIN_WCHAR 3 + +int erts_get_native_filename_encoding(void); + +void erts_init_sys_common_misc(void); #endif diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c new file mode 100644 index 0000000000..dbb59676c8 --- /dev/null +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -0,0 +1,78 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2010. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Check I/O + * + * Author: Rickard Green + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "global.h" + +#if !defined(__WIN32__) +#include +#if !defined(HAVE_SETLOCALE) || !defined(HAVE_NL_LANGINFO) || !defined(HAVE_LANGINFO_H) +#define PRIMITIVE_UTF8_CHECK 1 +#else +#include +#endif +#endif + +/* Written once and only once */ + +static int filename_encoding = ERL_FILENAME_UNKNOWN; + +void erts_init_sys_common_misc(void) +{ +#if defined(__WIN32__) + filename_encoding = ERL_FILENAME_WIN_WCHAR; +#else + char *l; + filename_encoding = ERL_FILENAME_LATIN1; +# ifdef PRIMITIVE_UTF8_CHECK + setlocale(LC_CTYPE, ""); /* Set international environment, + ignore result */ + if (((l = getenv("LC_ALL")) && *l) || + ((l = getenv("LC_CTYPE")) && *l) || + ((l = getenv("LANG")) && *l)) { + if (strstr(l, "UTF-8")) { + filename_encoding = ERL_FILENAME_UTF8; + } + } + +# else + l = setlocale(LC_CTYPE, ""); /* Set international environment */ + if (l != NULL) { + if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) { + filename_encoding = ERL_FILENAME_UTF8; + } + } +# endif +#endif +} + +int erts_get_native_filename_encoding(void) +{ + return filename_encoding; +} -- cgit v1.2.3 From 4cf08709189ea8b7e2ae20f85c390abd04ae48ae Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Oct 2010 17:08:32 +0200 Subject: Teach filename to accept raw data and add filename enc option to emu --- erts/emulator/beam/atom.names | 1 - erts/emulator/beam/bif.tab | 4 +- erts/emulator/beam/erl_init.c | 22 +- erts/emulator/beam/erl_unicode.c | 108 ++++++-- erts/emulator/beam/sys.h | 3 + erts/emulator/sys/common/erl_sys_common_misc.c | 52 ++-- lib/kernel/src/file.erl | 4 +- lib/stdlib/src/filename.erl | 335 ++++++++++++++----------- 8 files changed, 345 insertions(+), 184 deletions(-) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 93b8e3ec28..327620772f 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -549,7 +549,6 @@ atom waiting atom wall_clock atom warning atom warning_msg -atom win_wchar atom wordsize atom write_concurrency atom xor diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 55166417e5..cf251d9016 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -797,8 +797,8 @@ bif erlang:nif_error/2 # # Helpers for unicode filenames # -bif file:name2native/1 -bif file:native2name/1 +bif file:internal_name2native/1 +bif file:internal_native2name/1 bif file:native_name_encoding/0 # # Obsolete diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index a7892e143b..464ee750f7 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -908,7 +908,27 @@ erl_start(int argc, char **argv) VERBOSE(DEBUG_SYSTEM, ("using display items %d\n",display_items)); break; - + case 'f': + if (!strncmp(argv[i],"-fn",3)) { + arg = get_arg(argv[i]+3, argv[i+1], &i); + switch (*arg) { + case 'u': + erts_set_user_requested_filename_encoding(ERL_FILENAME_UTF8); + break; + case 'l': + erts_set_user_requested_filename_encoding(ERL_FILENAME_LATIN1); + break; + case 'a': + erts_set_user_requested_filename_encoding(ERL_FILENAME_UNKNOWN); + default: + erts_fprintf(stderr, "bad filename encoding %s, can be (l,u or a)\n", arg); + erts_usage(); + } + break; + } else { + erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]); + erts_usage(); + } case 'l': display_loads++; break; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 671c3c0cdf..3e7a935cef 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1876,6 +1876,10 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ Uint x = unsigned_val(obj); switch (encoding) { case ERL_FILENAME_LATIN1: + if (x > 255) { + DESTROY_ESTACK(stack); + return ((Sint) -1); + } need += 1; break; case ERL_FILENAME_UTF8: @@ -2101,12 +2105,76 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ -BIF_RETTYPE file_name2native_1(BIF_ALIST_1) +BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1) { int encoding = erts_get_native_filename_encoding(); Sint need; Eterm bin_term; byte* bin_p; + if (is_binary(BIF_ARG_1)) { + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + Uint size,num_chars; + Uint unipoint; + /* Uninterpreted encoding except if windows widechar, in case we convert from + utf8 to win_wchar */ + if (encoding != ERL_FILENAME_WIN_WCHAR) { + BIF_RET(BIF_ARG_1); + } + /* In a wchar world, the emulator flags only affect how + binaries are interpreted when sent from the user. */ + /* Determine real length and create a new binary */ + size = binary_size(BIF_ARG_1); + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK || + erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + /* What to do now? Maybe latin1, so just take byte for byte instead */ + bin_term = new_binary(BIF_P, 0, size*2); + bin_p = binary_bytes(bin_term); + while (size--) { + *bin_p++ = *bytes++; + *bin_p++ = 0; + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(bin_term); + } + /* OK, UTF8 ok, number of characters is in num_chars */ + bin_term = new_binary(BIF_P, 0, num_chars*2); + bin_p = binary_bytes(bin_term); + while (num_chars--) { + if (((*bytes) & ((byte) 0x80)) == 0) { + unipoint = (Uint) *bytes; + ++bytes; + } else if (((*bytes) & ((byte) 0xE0)) == 0xC0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0x1F))) << 6) | + ((Uint) (bytes[1] & ((byte) 0x3F))); + bytes += 2; + } else if (((*bytes) & ((byte) 0xF0)) == 0xE0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0xF))) << 12) | + (((Uint) (bytes[1] & ((byte) 0x3F))) << 6) | + ((Uint) (bytes[2] & ((byte) 0x3F))); + bytes +=3; + } else if (((*bytes) & ((byte) 0xF8)) == 0xF0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0x7))) << 18) | + (((Uint) (bytes[1] & ((byte) 0x3F))) << 12) | + (((Uint) (bytes[2] & ((byte) 0x3F))) << 6) | + ((Uint) (bytes[3] & ((byte) 0x3F))); + bytes += 4; + } else { + erl_exit(1,"Internal unicode error in file:name2native/1"); + } + *bin_p++ = (byte) (unipoint & 0xFF); + *bin_p++ = (byte) ((unipoint >> 8) & 0xFF); + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(bin_term); + } /* binary */ + + if ((need = simple_char_need(BIF_ARG_1,encoding)) < 0) { BIF_ERROR(BIF_P,BADARG); } @@ -2116,7 +2184,7 @@ BIF_RETTYPE file_name2native_1(BIF_ALIST_1) BIF_RET(bin_term); } -BIF_RETTYPE file_native2name_1(BIF_ALIST_1) +BIF_RETTYPE file_internal_native2name_1(BIF_ALIST_1) { Eterm real_bin; Uint offset; @@ -2144,12 +2212,15 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1) } switch (erts_get_native_filename_encoding()) { case ERL_FILENAME_LATIN1: - goto simple; + hp = HAlloc(BIF_P, 2 * size); + bytes = binary_bytes(real_bin)+offset; + + BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); case ERL_FILENAME_UTF8: bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK) { erts_free_aligned_binary_bytes(temp_alloc); - goto simple; + goto noconvert; } num_built = 0; num_eaten = 0; @@ -2157,12 +2228,16 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1) erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); case ERL_FILENAME_WIN_WCHAR: - if ((size % 2) != 0) { - goto simple; - } bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - hp = HAlloc(BIF_P, size); - ret = NIL; + if ((size % 2) != 0) { /* Panic fixup to avoid crashing the emulator */ + size--; + hp = HAlloc(BIF_P, size+2); + ret = CONS(hp,make_small((Uint) bytes[size]),NIL); + hp += 2; + } else { + hp = HAlloc(BIF_P, size); + ret = NIL; + } bytes += size-1; while (size > 0) { Uint x = ((Uint) *bytes--) << 8; @@ -2173,13 +2248,10 @@ BIF_RETTYPE file_native2name_1(BIF_ALIST_1) erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); default: - goto simple; + goto noconvert; } - simple: - hp = HAlloc(BIF_P, 2 * size); - bytes = binary_bytes(real_bin)+offset; - - BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); + noconvert: + BIF_RET(BIF_ARG_1); } BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) @@ -2190,7 +2262,11 @@ BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) case ERL_FILENAME_UTF8: BIF_RET(am_utf8); case ERL_FILENAME_WIN_WCHAR: - BIF_RET(am_win_wchar); + if (erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + BIF_RET(am_latin1); + } else { + BIF_RET(am_utf8); + } default: BIF_RET(am_undefined); } diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index d14e0ac105..57f2b2f16c 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1263,6 +1263,9 @@ char* win32_errorstr(int); #define ERL_FILENAME_WIN_WCHAR 3 int erts_get_native_filename_encoding(void); +/* The set function is only to be used by erl_init! */ +void erts_set_user_requested_filename_encoding(int encoding); +int erts_get_user_requested_filename_encoding(void); void erts_init_sys_common_misc(void); diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index dbb59676c8..581c14b6c6 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -42,33 +42,51 @@ /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; +#if defined(__WIN32__) +static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */ +#else +static int user_filename_encoding = ERL_FILENAME_LATIN1; +#endif +void erts_set_user_requested_filename_encoding(int encoding) +{ + user_filename_encoding = encoding; +} + +int erts_get_user_requested_filename_encoding(void) +{ + return user_filename_encoding; +} void erts_init_sys_common_misc(void) { #if defined(__WIN32__) filename_encoding = ERL_FILENAME_WIN_WCHAR; #else - char *l; - filename_encoding = ERL_FILENAME_LATIN1; + if (user_filename_encoding != ERL_FILENAME_UNKNOWN) { + filename_encoding = user_filename_encoding; + } else { + char *l; + filename_encoding = ERL_FILENAME_LATIN1; # ifdef PRIMITIVE_UTF8_CHECK - setlocale(LC_CTYPE, ""); /* Set international environment, - ignore result */ - if (((l = getenv("LC_ALL")) && *l) || - ((l = getenv("LC_CTYPE")) && *l) || - ((l = getenv("LANG")) && *l)) { - if (strstr(l, "UTF-8")) { - filename_encoding = ERL_FILENAME_UTF8; - } - } - + setlocale(LC_CTYPE, ""); /* Set international environment, + ignore result */ + if (((l = getenv("LC_ALL")) && *l) || + ((l = getenv("LC_CTYPE")) && *l) || + ((l = getenv("LANG")) && *l)) { + if (strstr(l, "UTF-8")) { + filename_encoding = ERL_FILENAME_UTF8; + } + } + # else - l = setlocale(LC_CTYPE, ""); /* Set international environment */ - if (l != NULL) { - if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) { - filename_encoding = ERL_FILENAME_UTF8; + l = setlocale(LC_CTYPE, ""); /* Set international environment */ + if (l != NULL) { + if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) { + filename_encoding = ERL_FILENAME_UTF8; + } } - } # endif + } #endif } diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 97d914b043..89432869e9 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -75,7 +75,7 @@ -define(RAM_FILE, ram_file). % Module %% data types --type filename() :: string(). +-type filename() :: string() | binary(). -type file_info() :: #file_info{}. -type fd() :: #file_descriptor{}. -type io_device() :: pid() | fd(). @@ -87,7 +87,7 @@ | 'delayed_write' | {'read_ahead', pos_integer()} | 'read_ahead' | 'compressed' | {'encoding', unicode:encoding()}. --type name() :: string() | atom() | [name()]. +-type name() :: string() | atom() | [name()] | binary(). -type posix() :: 'eacces' | 'eagain' | 'ebadf' | 'ebusy' | 'edquot' | 'eexist' | 'efault' | 'efbig' | 'eintr' | 'einval' | 'eio' | 'eisdir' | 'eloop' | 'emfile' | 'emlink' diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 01c06e4596..40df54fe54 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -41,6 +41,9 @@ -include_lib("kernel/include/file.hrl"). +-define(IS_DRIVELETTER(Letter),(((Letter >= $A) andalso (Letter =< $Z)) orelse + ((Letter >= $a) andalso (Letter =< $z)))). + %% Converts a relative filename to an absolute filename %% or the filename itself if it already is an absolute filename %% Note that no attempt is made to create the most beatiful @@ -57,12 +60,18 @@ %% (for Unix) : absname("/") -> "/" %% (for WIN32): absname("/") -> "D:/" --spec absname(file:name()) -> string(). + +-spec absname(file:name()) -> file:filename(). absname(Name) -> {ok, Cwd} = file:get_cwd(), absname(Name, Cwd). --spec absname(file:name(), string()) -> string(). +-spec absname(file:name(), file:filename()) -> file:filename(). +absname(Name, AbsBase) when is_binary(Name), is_list(AbsBase) -> + absname(Name,filename_string_to_binary(AbsBase)); +absname(Name, AbsBase) when is_list(Name), is_binary(AbsBase) -> + absname(filename_string_to_binary(Name),AbsBase); + absname(Name, AbsBase) -> case pathtype(Name) of relative -> @@ -77,6 +86,20 @@ absname(Name, AbsBase) -> %% Handles volumerelative names (on Windows only). +absname_vr([<<"/">>|Rest1], [Volume|_], _AbsBase) -> + %% Absolute path on current drive. + join([Volume|Rest1]); +absname_vr([<>|Rest1], [<>|_], AbsBase) -> + %% Relative to current directory on current drive. + absname(join(Rest1), AbsBase); +absname_vr([<>|Name], _, _AbsBase) -> + %% Relative to current directory on another drive. + Dcwd = + case file:get_cwd([X, $:]) of + {ok, Dir} -> filename_string_to_binary(Dir); + {error, _} -> <> + end, + absname(join(Name), Dcwd); absname_vr(["/"|Rest1], [Volume|_], _AbsBase) -> %% Absolute path on current drive. join([Volume|Rest1]); @@ -92,41 +115,13 @@ absname_vr([[X, $:]|Name], _, _AbsBase) -> end, absname(join(Name), Dcwd). -%% Joins a relative filename to an absolute base. For VxWorks the -%% resulting name is fixed to minimize the length by collapsing -%% ".." directories. -%% For other systems this is just a join/2, but assumes that +%% Joins a relative filename to an absolute base. +%% This is just a join/2, but assumes that %% AbsBase must be absolute and Name must be relative. --spec absname_join(string(), file:name()) -> string(). +-spec absname_join(file:filename(), file:name()) -> file:filename(). absname_join(AbsBase, Name) -> - case major_os_type() of - vxworks -> - absname_pretty(AbsBase, split(Name), lists:reverse(split(AbsBase))); - _Else -> - join(AbsBase, flatten(Name)) - end. - -%% Handles absolute filenames for VxWorks - these are 'pretty-printed', -%% since a C function call chdir("/erlang/lib/../bin") really sets -%% cwd to '/erlang/lib/../bin' which also works, but the long term -%% effect is potentially not so good ... -%% -%% absname_pretty("../bin", "/erlang/lib") -> "/erlang/bin" -%% absname_pretty("../../../..", "/erlang") -> "/erlang" - -absname_pretty(Abspath, Relpath, []) -> - %% AbsBase _must_ begin with a vxworks device name - {device, _Rest, Dev} = vxworks_first(Abspath), - absname_pretty(Abspath, Relpath, [lists:reverse(Dev)]); -absname_pretty(_Abspath, [], AbsBase) -> - join(lists:reverse(AbsBase)); -absname_pretty(Abspath, [[$.]|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, AbsBase); -absname_pretty(Abspath, [[$.,$.]|Rest], [_|AbsRest]) -> - absname_pretty(Abspath, Rest, AbsRest); -absname_pretty(Abspath, [First|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, [First|AbsBase]). + join(AbsBase, flatten(Name)). %% Returns the part of the filename after the last directory separator, %% or the filename itself if it has no separators. @@ -136,12 +131,36 @@ absname_pretty(Abspath, [First|Rest], AbsBase) -> %% basename("/usr/foo/") -> "foo" (trailing slashes ignored) %% basename("/") -> [] --spec basename(file:name()) -> string(). +-spec basename(file:name()) -> file:filename(). +basename(Name) when is_binary(Name) -> + case os:type() of + {win32,_} -> + win_basenameb(Name); + _ -> + basenameb(Name,[<<"/">>]) + end; + basename(Name0) -> Name = flatten(Name0), {DirSep2, DrvSep} = separators(), basename1(skip_prefix(Name, DrvSep), [], DirSep2). +win_basenameb(<>) when ?IS_DRIVELETTER(Letter) -> + basenameb(Rest,[<<"/">>,<<"\\">>]); +win_basenameb(O) -> + basenameb(O,[<<"/">>,<<"\\">>]). +basenameb(Bin,Sep) -> + Parts = [ X || X <- binary:split(Bin,Sep,[global]), + X =:= <<>> ], + if + Parts =:= [] -> + []; + true -> + lists:last(Parts) + end. + + + basename1([$/|[]], Tail, DirSep2) -> basename1([], Tail, DirSep2); basename1([$/|Rest], _Tail, DirSep2) -> @@ -155,26 +174,11 @@ basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) -> basename1([], Tail, _DirSep2) -> lists:reverse(Tail). -skip_prefix(Name, false) -> % No prefix for unix, but for VxWorks. - case major_os_type() of - vxworks -> - case vxworks_first(Name) of - {device, Rest, _Device} -> - Rest; - {not_device, _Rest, _First} -> - Name - end; - _Else -> - Name - end; -skip_prefix(Name, DrvSep) -> - skip_prefix1(Name, DrvSep). - -skip_prefix1([L, DrvSep|Name], DrvSep) when is_integer(L) -> +skip_prefix(Name, false) -> + Name; +skip_prefix([L, DrvSep|Name], DrvSep) when ?IS_DRIVELETTER(L) -> Name; -skip_prefix1([L], _) when is_integer(L) -> - [L]; -skip_prefix1(Name, _) -> +skip_prefix(Name, _) -> Name. %% Returns the last component of the filename, with the given @@ -190,7 +194,27 @@ skip_prefix1(Name, _) -> %% rootname(basename("xxx.jam")) -> "xxx" %% rootname(basename("xxx.erl")) -> "xxx" --spec basename(file:name(), file:name()) -> string(). +-spec basename(file:name(), file:name()) -> file:filename(). +basename(Name, Ext) when is_binary(Name), is_list(Ext) -> + basename(Name,filename_string_to_binary(Ext)); +basename(Name, Ext) when is_list(Name), is_binary(Ext) -> + basename(filename_string_to_binary(Name),Ext); +basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> + BName = basename(Name), + LN = byte_size(BName), + LE = byte_size(Ext), + case LN - LE of + Neg when Neg < 0 -> + BName; + Pos -> + case BName of + <> -> + Part; + Other -> + Other + end + end; + basename(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -216,21 +240,10 @@ basename([], _Ext, Tail, _DrvSep2) -> %% Example: dirname("/usr/src/kalle.erl") -> "/usr/src", %% dirname("kalle.erl") -> "." --spec dirname(file:name()) -> string(). +-spec dirname(file:name()) -> file:filename(). dirname(Name0) -> Name = flatten(Name0), - case os:type() of - vxworks -> - {Devicep, Restname, FirstComp} = vxworks_first(Name), - case Devicep of - device -> - dirname(Restname, FirstComp, [], separators()); - _ -> - dirname(Name, [], [], separators()) - end; - _ -> - dirname(Name, [], [], separators()) - end. + dirname(Name, [], [], separators()). dirname([[_|_]=List|Rest], Dir, File, Seps) -> dirname(List++Rest, Dir, File, Seps); @@ -268,7 +281,7 @@ dirname([], Dir, _, _) -> %% %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" --spec extension(file:name()) -> string(). +-spec extension(file:name()) -> file:filename(). extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). @@ -281,8 +294,6 @@ extension([$/|Rest], _Result, OsType) -> extension(Rest, [], OsType); extension([$\\|Rest], _Result, win32) -> extension(Rest, [], win32); -extension([$\\|Rest], _Result, vxworks) -> - extension(Rest, [], vxworks); extension([Char|Rest], Result, OsType) when is_integer(Char) -> extension(Rest, [Char|Result], OsType); extension([], Result, _OsType) -> @@ -290,23 +301,36 @@ extension([], Result, _OsType) -> %% Joins a list of filenames with directory separators. --spec join([string()]) -> string(). +-spec join([file:filename()]) -> file:filename(). join([Name1, Name2|Rest]) -> join([join(Name1, Name2)|Rest]); join([Name]) when is_list(Name) -> join1(Name, [], [], major_os_type()); +join([Name]) when is_binary(Name) -> + join1b(Name, <<>>, [], major_os_type()); join([Name]) when is_atom(Name) -> join([atom_to_list(Name)]). %% Joins two filenames with directory separators. --spec join(string(), string()) -> string(). +-spec join(file:filename(), file:filename()) -> file:filename(). join(Name1, Name2) when is_list(Name1), is_list(Name2) -> OsType = major_os_type(), case pathtype(Name2) of relative -> join1(Name1, Name2, [], OsType); _Other -> join1(Name2, [], [], OsType) end; +join(Name1, Name2) when is_binary(Name1), is_list(Name2) -> + join(Name1,filename_string_to_binary(Name2)); +join(Name1, Name2) when is_list(Name1), is_binary(Name2) -> + join(filename_string_to_binary(Name1),Name2); +join(Name1, Name2) when is_binary(Name1), is_binary(Name2) -> + OsType = major_os_type(), + case pathtype(Name2) of + relative -> join1b(Name1, Name2, [], OsType); + _Other -> join1b(Name2, <<>>, [], OsType) + end; + join(Name1, Name2) when is_atom(Name1) -> join(atom_to_list(Name1), Name2); join(Name1, Name2) when is_atom(Name2) -> @@ -321,8 +345,6 @@ when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); -join1([$\\|Rest], RelativeName, Result, vxworks) -> - join1([$/|Rest], RelativeName, Result, vxworks); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> join1(Rest, RelativeName, [$/|Result], OsType); join1([$/|Rest], RelativeName, [$/|Result], OsType) -> @@ -344,6 +366,26 @@ join1([Char|Rest], RelativeName, Result, OsType) when is_integer(Char) -> join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1(atom_to_list(Atom)++Rest, RelativeName, Result, OsType). +join1b(<>, RelativeName, [], win32) +when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> + join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> + join1b(<<$/,Rest>>, RelativeName, Result, win32); +join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<>>, <<>>, Result, OsType) -> + list_to_binary(maybe_remove_dirsep(Result, OsType)); +join1b(<<>>, RelativeName, [$:|Rest], win32) -> + join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/|Result], OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<<>>, RelativeName, Result, OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<>, RelativeName, Result, OsType) when is_integer(Char) -> + join1b(Rest, RelativeName, [Char|Result], OsType). + maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> @@ -357,7 +399,7 @@ maybe_remove_dirsep(Name, _) -> %% a given base directory, which is is assumed to be normalised %% by a previous call to join/{1,2}. --spec append(string(), file:name()) -> string(). +-spec append(file:filename(), file:name()) -> file:filename(). append(Dir, Name) -> Dir ++ [$/|Name]. @@ -376,19 +418,14 @@ append(Dir, Name) -> -spec pathtype(file:name()) -> 'absolute' | 'relative' | 'volumerelative'. pathtype(Atom) when is_atom(Atom) -> pathtype(atom_to_list(Atom)); -pathtype(Name) when is_list(Name) -> +pathtype(Name) when is_list(Name) or is_binary(Name) -> case os:type() of {unix, _} -> unix_pathtype(Name); - {win32, _} -> win32_pathtype(Name); - vxworks -> case vxworks_first(Name) of - {device, _Rest, _Dev} -> - absolute; - _ -> - relative - end; - {ose,_} -> unix_pathtype(Name) + {win32, _} -> win32_pathtype(Name) end. +unix_pathtype(<<$/,_/binary>>) -> + absolute; unix_pathtype([$/|_]) -> absolute; unix_pathtype([List|Rest]) when is_list(List) -> @@ -404,6 +441,15 @@ win32_pathtype([Atom|Rest]) when is_atom(Atom) -> win32_pathtype(atom_to_list(Atom)++Rest); win32_pathtype([Char, List|Rest]) when is_list(List) -> win32_pathtype([Char|List++Rest]); +win32_pathtype(<<$/, $/, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $/, _/binary>>) -> absolute; +win32_pathtype(<<$/, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$/, _/binary>>) -> volumerelative; +win32_pathtype(<<$\\, _/binary>>) -> volumerelative; +win32_pathtype(<<_Letter, $:, $/, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, $\\, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, _/binary>>) -> volumerelative; win32_pathtype([$/, $/|_]) -> absolute; win32_pathtype([$\\, $/|_]) -> absolute; win32_pathtype([$/, $\\|_]) -> absolute; @@ -422,7 +468,7 @@ win32_pathtype(_) -> relative. %% Examples: rootname("/jam.src/kalle") -> "/jam.src/kalle" %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" --spec rootname(file:name()) -> string(). +-spec rootname(file:name()) -> file:filename(). rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -431,8 +477,6 @@ rootname([$/|Rest], Root, Ext, OsType) -> rootname(Rest, [$/]++Ext++Root, [], OsType); rootname([$\\|Rest], Root, Ext, win32) -> rootname(Rest, [$/]++Ext++Root, [], win32); -rootname([$\\|Rest], Root, Ext, vxworks) -> - rootname(Rest, [$/]++Ext++Root, [], vxworks); rootname([$.|Rest], Root, [], OsType) -> rootname(Rest, Root, ".", OsType); rootname([$.|Rest], Root, Ext, OsType) -> @@ -451,7 +495,7 @@ rootname([], Root, _Ext, _OsType) -> %% Examples: rootname("/jam.src/kalle.jam", ".erl") -> "/jam.src/kalle.jam" %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" --spec rootname(file:name(), file:name()) -> string(). +-spec rootname(file:name(), file:name()) -> file:filename(). rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -471,27 +515,55 @@ rootname2([Char|Rest], Ext, Result) when is_integer(Char) -> %% split("foo/bar") -> ["foo", "bar"] %% split("a:\\msdev\\include") -> ["a:/", "msdev", "include"] --spec split(file:name()) -> [string()]. +-spec split(file:name()) -> [file:filename()]. +split(Name) when is_binary(Name) -> + case os:type() of + {win32, _} -> win32_splitb(Name); + _ -> unix_splitb(Name) + end; + split(Name0) -> Name = flatten(Name0), case os:type() of - {unix, _} -> unix_split(Name); {win32, _} -> win32_split(Name); - vxworks -> vxworks_split(Name); - {ose,_} -> unix_split(Name) + _ -> unix_split(Name) end. -%% If a VxWorks filename starts with '[/\].*[^/\]' '[/\].*:' or '.*:' -%% that part of the filename is considered a device. -%% The rest of the name is interpreted exactly as for win32. -%% XXX - dirty solution to make filename:split([]) return the same thing on -%% VxWorks as on unix and win32. -vxworks_split([]) -> - []; -vxworks_split(L) -> - {_Devicep, Rest, FirstComp} = vxworks_first(L), - split(Rest, [], [lists:reverse(FirstComp)], win32). +unix_splitb(Name) -> + L = binary:split(Name,[<<"/">>],[global]), + LL = case L of + [<<>>|Rest] -> + [<<"/">>|Rest]; + _ -> + L + end, + [ X || X <- LL, X =/= <<>>]. + + +fix_driveletter(Letter0) -> + if + Letter0 >= $A, Letter0 =< $Z -> + Letter0+$a-$A; + true -> + Letter0 + end. +win32_splitb(<>) when (((Slash =:= $\\) orelse (Slash =:= $/)) andalso + ?IS_DRIVELETTER(Letter0)) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ?IS_DRIVELETTER(Letter0) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(Name) -> + L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + unix_split(Name) -> split(Name, [], unix). @@ -502,8 +574,6 @@ win32_split([X, $\\|Rest]) when is_integer(X) -> win32_split([X, $/|Rest]); win32_split([X, Y, $\\|Rest]) when is_integer(X), is_integer(Y) -> win32_split([X, Y, $/|Rest]); -win32_split([$/, $/|Rest]) -> - split(Rest, [], [[$/, $/]]); win32_split([UcLetter, $:|Rest]) when UcLetter >= $A, UcLetter =< $Z -> win32_split([UcLetter+$a-$A, $:|Rest]); win32_split([Letter, $:, $/|Rest]) -> @@ -540,7 +610,7 @@ split([], Comp, Components, OsType) -> %% will be converted to backslashes. On all platforms, the %% name will be normalized as done by join/1. --spec nativename(string()) -> string(). +-spec nativename(file:filename()) -> file:filename(). nativename(Name0) -> Name = join([Name0]), %Normalize. case os:type() of @@ -557,13 +627,12 @@ win32_nativename([]) -> separators() -> case os:type() of - {unix, _} -> {false, false}; {win32, _} -> {$\\, $:}; - vxworks -> {$\\, false}; - {ose,_} -> {false, false} + _ -> {false, false} end. + %% find_src(Module) -- %% find_src(Module, Rules) -- %% @@ -733,45 +802,12 @@ major_os_type() -> OsT -> OsT end. -%% Need to take care of the first pathname component separately -%% due to VxWorks less than good device naming rules. -%% (i.e. this is VxWorks specific ...) -%% The following four all starts with device names -%% elrond:/foo -> elrond: -%% elrond:\\foo.bar -> elrond: -%% /DISK1:foo -> /DISK1: -%% /usr/include -> /usr -%% This one doesn't: -%% foo/bar - -vxworks_first([]) -> - {not_device, [], []}; -vxworks_first([$/|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([$\\|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([H|T]) when is_list(H) -> - vxworks_first(H++T); -vxworks_first([H|T]) -> - vxworks_first2(not_device, T, [H]). - -vxworks_first2(Devicep, [], FirstComp) -> - {Devicep, [], FirstComp}; -vxworks_first2(Devicep, [$/|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(Devicep, [$\\|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(_Devicep, [$:|T], FirstComp)-> - {device, T, [$:|FirstComp]}; -vxworks_first2(Devicep, [H|T], FirstComp) when is_list(H) -> - vxworks_first2(Devicep, H++T, FirstComp); -vxworks_first2(Devicep, [H|T], FirstComp) -> - vxworks_first2(Devicep, T, [H|FirstComp]). - %% flatten(List) %% Flatten a list, also accepting atoms. --spec flatten(file:name()) -> string(). +-spec flatten(file:name()) -> file:filename(). +flatten(Bin) when is_binary(Bin) -> + Bin; flatten(List) -> do_flatten(List, []). @@ -785,3 +821,12 @@ do_flatten([], Tail) -> Tail; do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). + +filename_string_to_binary(List) -> + case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + {error,_,_} -> + erlang:error(badarg); + Bin when is_binary(Bin) -> + Bin + end. + -- cgit v1.2.3 From a3ad23a50ef3dbdb8309bd178014d3285d87a78f Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Oct 2010 10:01:20 +0200 Subject: Teach binary:matches not to return 'nomatch' on empty input --- erts/emulator/beam/erl_bif_binary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index b6a445c55c..684fa5d12f 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -1477,7 +1477,7 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) goto badarg; } if (hsend == 0) { - BIF_RET(am_nomatch); + BIF_RET(NIL); } if (is_tuple(BIF_ARG_2)) { tp = tuple_val(BIF_ARG_2); -- cgit v1.2.3 From 63eeba2f6829aac2644eaf212ebef9cdf4b59e8d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Oct 2010 10:12:57 +0200 Subject: Handle binary file names and conversion of unicode strings --- erts/emulator/beam/bif.tab | 4 +- erts/emulator/beam/erl_unicode.c | 4 +- erts/emulator/drivers/common/efile_drv.c | 18 +++--- erts/preloaded/src/prim_file.erl | 106 ++++++++++++++++++------------- lib/kernel/src/file.erl | 2 + lib/kernel/src/file_io_server.erl | 4 +- lib/stdlib/src/c.erl | 20 +++++- lib/stdlib/src/filename.erl | 79 ++++++++++++++++++++++- 8 files changed, 177 insertions(+), 60 deletions(-) diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index cf251d9016..3de5e63a63 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -797,8 +797,8 @@ bif erlang:nif_error/2 # # Helpers for unicode filenames # -bif file:internal_name2native/1 -bif file:internal_native2name/1 +bif prim_file:internal_name2native/1 +bif prim_file:internal_native2name/1 bif file:native_name_encoding/0 # # Obsolete diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 3e7a935cef..6d8e9ccd90 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2105,7 +2105,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ -BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1) +BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) { int encoding = erts_get_native_filename_encoding(); Sint need; @@ -2184,7 +2184,7 @@ BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1) BIF_RET(bin_term); } -BIF_RETTYPE file_internal_native2name_1(BIF_ALIST_1) +BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) { Eterm real_bin; Uint offset; diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index c450f10f48..5aa5f60d0f 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -67,6 +67,8 @@ #define FILE_RESP_LDATA 6 #define FILE_RESP_N2DATA 7 #define FILE_RESP_EOF 8 +#define FILE_RESP_FNAME 9 +#define FILE_RESP_ALL_DATA 10 /* Options */ @@ -1591,7 +1593,7 @@ static void invoke_readdir(void *data) buf_sz = READDIR_BUFSIZE - 4/* EOB */; } - p[4] = FILE_RESP_OK; + p[4] = FILE_RESP_FNAME; buf_sz -= 4 + 1; str = p + 4 + 1; ASSERT(buf_sz >= MAXPATHLEN + 1); @@ -1911,7 +1913,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) if (!d->result_ok) reply_error(desc, &d->errInfo); else { - header[0] = FILE_RESP_OK; + header[0] = FILE_RESP_ALL_DATA; TRACE_C('R'); driver_output_binary(desc->port, header, 1, d->c.read_file.binp, @@ -1968,10 +1970,10 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) if (!d->result_ok) reply_error(desc, &d->errInfo); else { - resbuf[0] = FILE_RESP_OK; + resbuf[0] = FILE_RESP_FNAME; length = 1+strlen((char*) resbuf+1); TRACE_C('R'); - driver_output2(desc->port, resbuf, length, NULL, 0); + driver_output2(desc->port, resbuf, 1, resbuf+1, length-1); } free_data(data); break; @@ -2031,7 +2033,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) int sz = get_int32(p); while (sz) { /* 0 == EOB */ p += 4; - driver_output2(desc->port, p, sz, NULL, 0); + driver_output2(desc->port, p, 1, p+1, sz-1); p += sz; sz = get_int32(p); } @@ -2210,12 +2212,12 @@ file_output(ErlDrvData e, char* buf, int count) errInfo.posix_errno = 0; dir_handle = NULL; - resbuf[0] = FILE_RESP_OK; + resbuf[0] = FILE_RESP_FNAME; while (efile_readdir(&errInfo, name, &dir_handle, resbuf+1, RESBUFSIZE)) { - int length = 1 + strlen(resbuf+1); - driver_output2(desc->port, resbuf, length, NULL, 0); + int length = strlen(resbuf+1); + driver_output2(desc->port, resbuf, 1, resbuf+1, length); } if (errInfo.posix_errno != 0) { reply_error(desc, &errInfo); diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 7f24889bb2..b15997572c 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -109,6 +109,8 @@ -define(FILE_RESP_LDATA, 6). -define(FILE_RESP_N2DATA, 7). -define(FILE_RESP_EOF, 8). +-define(FILE_RESP_FNAME, 9). +-define(FILE_RESP_ALL_DATA, 10). %% Open modes for the driver's open function. -define(EFILE_MODE_READ, 1). @@ -153,7 +155,7 @@ %% Opens a file using the driver port Port. Returns {error, Reason} %% | {ok, FileDescriptor} open(Port, File, ModeList) when is_port(Port), - is_list(File), + (is_list(File) orelse is_binary(File)), is_list(ModeList) -> case open_mode(ModeList) of {Mode, _Portopts, _Setopts} -> @@ -165,10 +167,11 @@ open(_,_,_) -> {error, badarg}. %% Opens a file. Returns {error, Reason} | {ok, FileDescriptor}. -open(File, ModeList) when is_list(File), is_list(ModeList) -> +open(File, ModeList) when (is_list(File) orelse is_binary(File)), + is_list(ModeList) -> case open_mode(ModeList) of {Mode, Portopts, Setopts} -> - open_int({?FD_DRV, Portopts}, File, Mode, Setopts); + open_int({?FD_DRV, Portopts},File, Mode, Setopts); Reason -> {error, Reason} end; @@ -196,7 +199,7 @@ open_int({Driver, Portopts}, File, Mode, Setopts) -> end; open_int(Port, File, Mode, Setopts) -> M = Mode band ?EFILE_MODE_MASK, - case drv_command(Port, [<>, File, 0]) of + case drv_command(Port, [<>, pathname(File), 0]) of {ok, Number} -> open_int_setopts(Port, Number, Setopts); Error -> @@ -489,7 +492,7 @@ ipread_s32bu_p32bu(#file_descriptor{module = ?MODULE, data = {_, _}}, %% Returns {ok, Contents} | {error, Reason} -read_file(File) -> +read_file(File) when (is_list(File) orelse is_binary(File)) -> case drv_open(?FD_DRV, [binary]) of {ok, Port} -> Result = read_file(Port, File), @@ -497,11 +500,14 @@ read_file(File) -> Result; {error, _} = Error -> Error - end. + end; +read_file(_) -> + {error, badarg}. %% Takes a Port opened with open/1. -read_file(Port, File) when is_port(Port) -> - Cmd = [?FILE_READ_FILE | File], +read_file(Port, File) when is_port(Port), + (is_list(File) orelse is_binary(File))-> + Cmd = [?FILE_READ_FILE | pathname(File)], case drv_command(Port, Cmd) of {error, enomem} -> %% It could possibly help to do a @@ -512,12 +518,14 @@ read_file(Port, File) when is_port(Port) -> drv_command(Port, Cmd); Result -> Result - end. + end; +read_file(_,_) -> + {error, badarg}. %% Returns {error, Reason} | ok. -write_file(File, Bin) -> +write_file(File, Bin) when (is_list(File) orelse is_binary(File)) -> case open(File, [binary, write]) of {ok, Handle} -> Result = write(Handle, Bin), @@ -525,8 +533,10 @@ write_file(File, Bin) -> Result; Error -> Error - end. - + end; +write_file(_, _) -> + {error, badarg}. + %%%----------------------------------------------------------------- @@ -539,7 +549,7 @@ write_file(File, Bin) -> %% Returns {ok, Port}, the Port should be used as first argument in all %% the following functions. Returns {error, Reason} upon failure. start() -> - try erlang:open_port({spawn, atom_to_list(?DRV)}, []) of + try erlang:open_port({spawn, atom_to_list(?DRV)}, [binary]) of Port -> {ok, Port} catch @@ -596,7 +606,7 @@ get_cwd(_, _) -> {error, badarg}. get_cwd_int(Drive) -> - get_cwd_int({?DRV, []}, Drive). + get_cwd_int({?DRV, [binary]}, Drive). get_cwd_int(Port, Drive) -> drv_command(Port, <>). @@ -606,7 +616,7 @@ get_cwd_int(Port, Drive) -> %% set_cwd/{1,2} set_cwd(Dir) -> - set_cwd_int({?DRV, []}, Dir). + set_cwd_int({?DRV, [binary]}, Dir). set_cwd(Port, Dir) when is_port(Port) -> set_cwd_int(Port, Dir). @@ -632,89 +642,88 @@ set_cwd_int(Port, Dir0) -> end), %% Dir is now either a string or an EXIT tuple. %% An EXIT tuple will fail in the following catch. - drv_command(Port, [?FILE_CHDIR, Dir, 0]). + drv_command(Port, [?FILE_CHDIR, pathname(Dir), 0]). %% delete/{1,2} delete(File) -> - delete_int({?DRV, []}, File). + delete_int({?DRV, [binary]}, File). delete(Port, File) when is_port(Port) -> delete_int(Port, File). delete_int(Port, File) -> - drv_command(Port, [?FILE_DELETE, File, 0]). + drv_command(Port, [?FILE_DELETE, pathname(File), 0]). %% rename/{2,3} rename(From, To) -> - rename_int({?DRV, []}, From, To). + rename_int({?DRV, [binary]}, From, To). rename(Port, From, To) when is_port(Port) -> rename_int(Port, From, To). rename_int(Port, From, To) -> - drv_command(Port, [?FILE_RENAME, From, 0, To, 0]). + drv_command(Port, [?FILE_RENAME, pathname(From), 0, pathname(To), 0]). %% make_dir/{1,2} make_dir(Dir) -> - make_dir_int({?DRV, []}, Dir). + make_dir_int({?DRV, [binary]}, Dir). make_dir(Port, Dir) when is_port(Port) -> make_dir_int(Port, Dir). make_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_MKDIR, Dir, 0]). + drv_command(Port, [?FILE_MKDIR, pathname(Dir), 0]). %% del_dir/{1,2} del_dir(Dir) -> - del_dir_int({?DRV, []}, Dir). + del_dir_int({?DRV, [binary]}, Dir). del_dir(Port, Dir) when is_port(Port) -> del_dir_int(Port, Dir). del_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_RMDIR, Dir, 0]). + drv_command(Port, [?FILE_RMDIR, pathname(Dir), 0]). %% read_file_info/{1,2} read_file_info(File) -> - read_file_info_int({?DRV, []}, File). + read_file_info_int({?DRV, [binary]}, File). read_file_info(Port, File) when is_port(Port) -> read_file_info_int(Port, File). read_file_info_int(Port, File) -> - drv_command(Port, [?FILE_FSTAT, File, 0]). + drv_command(Port, [?FILE_FSTAT, pathname(File), 0]). %% altname/{1,2} altname(File) -> - altname_int({?DRV, []}, File). + altname_int({?DRV, [binary]}, File). altname(Port, File) when is_port(Port) -> altname_int(Port, File). altname_int(Port, File) -> - drv_command(Port, [?FILE_ALTNAME, File, 0]). - + drv_command(Port, [?FILE_ALTNAME, pathname(File), 0]). %% write_file_info/{2,3} write_file_info(File, Info) -> - write_file_info_int({?DRV, []}, File, Info). + write_file_info_int({?DRV, [binary]}, File, Info). write_file_info(Port, File, Info) when is_port(Port) -> write_file_info_int(Port, File, Info). @@ -740,72 +749,72 @@ write_file_info_int(Port, date_to_bytes(Atime), date_to_bytes(Mtime), date_to_bytes(Ctime), - File, 0]). + pathname(File), 0]). %% make_link/{2,3} make_link(Old, New) -> - make_link_int({?DRV, []}, Old, New). + make_link_int({?DRV, [binary]}, Old, New). make_link(Port, Old, New) when is_port(Port) -> make_link_int(Port, Old, New). make_link_int(Port, Old, New) -> - drv_command(Port, [?FILE_LINK, Old, 0, New, 0]). + drv_command(Port, [?FILE_LINK, pathname(Old), 0, pathname(New), 0]). %% make_symlink/{2,3} make_symlink(Old, New) -> - make_symlink_int({?DRV, []}, Old, New). + make_symlink_int({?DRV, [binary]}, Old, New). make_symlink(Port, Old, New) when is_port(Port) -> make_symlink_int(Port, Old, New). make_symlink_int(Port, Old, New) -> - drv_command(Port, [?FILE_SYMLINK, Old, 0, New, 0]). + drv_command(Port, [?FILE_SYMLINK, pathname(Old), 0, pathname(New), 0]). %% read_link/{2,3} read_link(Link) -> - read_link_int({?DRV, []}, Link). + read_link_int({?DRV, [binary]}, Link). read_link(Port, Link) when is_port(Port) -> read_link_int(Port, Link). read_link_int(Port, Link) -> - drv_command(Port, [?FILE_READLINK, Link, 0]). + drv_command(Port, [?FILE_READLINK, pathname(Link), 0]). %% read_link_info/{2,3} read_link_info(Link) -> - read_link_info_int({?DRV, []}, Link). + read_link_info_int({?DRV, [binary]}, Link). read_link_info(Port, Link) when is_port(Port) -> read_link_info_int(Port, Link). read_link_info_int(Port, Link) -> - drv_command(Port, [?FILE_LSTAT, Link, 0]). + drv_command(Port, [?FILE_LSTAT, pathname(Link), 0]). %% list_dir/{1,2} list_dir(Dir) -> - list_dir_int({?DRV, []}, Dir). + list_dir_int({?DRV, [binary]}, Dir). list_dir(Port, Dir) when is_port(Port) -> list_dir_int(Port, Dir). list_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_READDIR, Dir, 0], []). + drv_command(Port, [?FILE_READDIR, pathname(Dir), 0], []). @@ -1026,8 +1035,6 @@ lseek_position(_) -> translate_response(?FILE_RESP_OK, []) -> ok; -translate_response(?FILE_RESP_OK, Data) -> - {ok, Data}; translate_response(?FILE_RESP_ERROR, List) when is_list(List) -> {error, list_to_atom(List)}; translate_response(?FILE_RESP_NUMBER, List) -> @@ -1074,6 +1081,16 @@ translate_response(?FILE_RESP_N2DATA = X, L0) when is_list(L0) -> end; translate_response(?FILE_RESP_EOF, []) -> eof; +translate_response(?FILE_RESP_FNAME, []) -> + ok; +translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> + {ok, prim_file:internal_native2name(Data)}; +translate_response(?FILE_RESP_FNAME, Data) -> + {ok, Data}; + +translate_response(?FILE_RESP_ALL_DATA, Data) -> + {ok, Data}; + translate_response(X, Data) -> {error, {bad_response_from_port, [X | Data]}}. @@ -1209,3 +1226,6 @@ lists_split([Hd | Tl], N, Rev) -> reverse(X) -> lists:reverse(X, []). reverse(L, T) -> lists:reverse(L, T). + +pathname(File) -> + prim_file:internal_name2native(File). diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 89432869e9..579b9132c6 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -1033,6 +1033,8 @@ path_open_first([], _Name, _Mode, LastError) -> %% Generates a flat file name from a deep list of atoms and %% characters (integers). +file_name(N) when is_binary(N) -> + N; file_name(N) -> try file_name_1(N) diff --git a/lib/kernel/src/file_io_server.erl b/lib/kernel/src/file_io_server.erl index 39dc32bb79..14da9c1a55 100644 --- a/lib/kernel/src/file_io_server.erl +++ b/lib/kernel/src/file_io_server.erl @@ -44,11 +44,11 @@ format_error(ErrorId) -> erl_posix_msg:message(ErrorId). start(Owner, FileName, ModeList) - when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn, Owner, FileName, ModeList). start_link(Owner, FileName, ModeList) - when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn_link, Owner, FileName, ModeList). %%%----------------------------------------------------------------- diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 6d50a575eb..399b91b92f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [Str]); + ok = io:format("~s\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -684,11 +684,27 @@ ls() -> ls(Dir) -> case file:list_dir(Dir) of {ok, Entries} -> - ls_print(sort(Entries)); + ls_print(sort(fixup_bin(Entries))); {error,_E} -> format("Invalid directory\n") end. +fixup_one_bin(X) when is_binary(X) -> + L = binary_to_list(X), + [ if + El > 127 -> + $?; + true -> + El + end || El <- L]; +fixup_one_bin(X) -> + X. +fixup_bin([H|T]) -> + [fixup_one_bin(H) | fixup_bin(T)]; +fixup_bin([]) -> + []. + + ls_print([]) -> ok; ls_print(L) -> Width = min([max(lengths(L, [])), 40]) + 5, diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 40df54fe54..9ca4b808e1 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -228,7 +228,7 @@ basename([$/|[]], Ext, Tail, DrvSep2) -> basename([], Ext, Tail, DrvSep2); basename([$/|Rest], Ext, _Tail, DrvSep2) -> basename(Rest, Ext, [], DrvSep2); -basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> +basename([DirSep2|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> basename([$/|Rest], Ext, Tail, DirSep2); basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) -> basename(Rest, Ext, [Char|Tail], DrvSep2); @@ -241,6 +241,39 @@ basename([], _Ext, Tail, _DrvSep2) -> %% dirname("kalle.erl") -> "." -spec dirname(file:name()) -> file:filename(). +dirname(Name) when is_binary(Name) -> + {Dsep,Drivesep} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + {XPart0,Dirs} = case Drivesep of + X when is_integer(X) -> + case Name of + <> when ?IS_DRIVELETTER(DL) -> + {<>,Rest}; + _ -> + {<<>>,Name} + end; + _ -> + {<<>>,Name} + end, + Parts0 = binary:split(Dirs,[<<"/">>|SList],[global]), + %% Fairly short lists of parts, OK to reverse twice... + Parts = case Parts0 of + [] -> []; + _ -> lists:reverse(fstrip(tl(lists:reverse(Parts0)))) + end, + XPart = case {Parts,XPart0} of + {[],<<>>} -> + <<".">>; + _ -> + XPart0 + end, + dirjoin(Parts,XPart,<<"/">>); + dirname(Name0) -> Name = flatten(Name0), dirname(Name, [], [], separators()). @@ -271,6 +304,26 @@ dirname([], [DrvSep,Dl], File, {_,DrvSep}) -> end; dirname([], Dir, _, _) -> lists:reverse(Dir). + +%% Compatibility with lists variant, remove trailing slashes +fstrip([<<>>,X|Y]) -> + fstrip([X|Y]); +fstrip(A) -> + A. + + +dirjoin([<<>>|T],Acc,Sep) -> + dirjoin1(T,<>,Sep); +dirjoin(A,B,C) -> + dirjoin1(A,B,C). + +dirjoin1([],Acc,_) -> + Acc; +dirjoin1([One],Acc,_) -> + <>; +dirjoin1([H|T],Acc,Sep) -> + dirjoin(T,<>,Sep). + %% Given a filename string, returns the file extension, %% including the period. Returns an empty list if there @@ -282,6 +335,30 @@ dirname([], Dir, _, _) -> %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" -spec extension(file:name()) -> file:filename(). +extension(Name) when is_binary(Name) -> + {Dsep,_} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + case binary:matches(Name,[<<".">>]) of + nomatch -> % Bug in binary workaround :( + <<>>; + [] -> + <<>>; + List -> + {Pos,_} = lists:last(List), + <<_:Pos/binary,Part/binary>> = Name, + case binary:match(Part,[<<"/">>|SList]) of + nomatch -> + Part; + _ -> + <<>> + end + end; + extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). -- cgit v1.2.3 From c5a67411ce1af7f12184ed3d645c794674cea8f9 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 4 Nov 2010 17:53:21 +0100 Subject: Allow installer to take redistributables from VC9 --- erts/etc/win32/nsis/Makefile | 2 ++ erts/etc/win32/nsis/dll_version_helper.sh | 10 ++++++++-- erts/etc/win32/nsis/erlang20.nsi | 10 +++++----- erts/etc/win32/nsis/find_redist.sh | 23 ++++++++++++++++++----- 4 files changed, 33 insertions(+), 12 deletions(-) mode change 100755 => 100644 erts/etc/win32/nsis/dll_version_helper.sh mode change 100755 => 100644 erts/etc/win32/nsis/find_redist.sh diff --git a/erts/etc/win32/nsis/Makefile b/erts/etc/win32/nsis/Makefile index ebb3ad9a96..981a232c69 100644 --- a/erts/etc/win32/nsis/Makefile +++ b/erts/etc/win32/nsis/Makefile @@ -45,6 +45,7 @@ WTARGET_DIR=$(shell (cygpath -d $(TARGET_DIR) 2>/dev/null || cygpath -d $(TARGET REDIST_FILE=$(shell (sh ./find_redist.sh || echo "")) REDIST_DLL_VERSION=$(shell (sh ./dll_version_helper.sh || echo "")) +REDIST_DLL_NAME=$(shell (sh ./dll_version_helper.sh -n || echo "")) release_spec: @NSIS_VER=`makensis /hdrinfo | head -1 | awk '{print $$2}'`; \ @@ -73,6 +74,7 @@ release_spec: cp $(REDIST_FILE) $(RELEASE_PATH)/vcredist_x86.exe;\ echo '!define HAVE_REDIST_FILE 1' >> $(VERSION_HEADER); \ echo '!define REDIST_DLL_VERSION "$(REDIST_DLL_VERSION)"' >> $(VERSION_HEADER);\ + echo '!define REDIST_DLL_NAME "$(REDIST_DLL_NAME)"' >> $(VERSION_HEADER);\ fi;\ if [ -f $(RELEASE_PATH)/docs/doc/index.html ];\ then \ diff --git a/erts/etc/win32/nsis/dll_version_helper.sh b/erts/etc/win32/nsis/dll_version_helper.sh old mode 100755 new mode 100644 index e0047dea8b..73a9183d68 --- a/erts/etc/win32/nsis/dll_version_helper.sh +++ b/erts/etc/win32/nsis/dll_version_helper.sh @@ -41,9 +41,15 @@ if [ '!' -f hello.exe.manifest ]; then exit 0 fi VERSION=`grep '&2 - exit 3 + fail=true + break; fi BPATH="$NBPATH" done -echo $BPATH -exit 0 \ No newline at end of file +if [ $fail = false ]; then + break; +fi +done +if [ $fail = false ]; then + echo $BPATH + exit 0 +else + echo "Failed to locate vcredist_x86.exe because directory structure was unexpected" >&2 + exit 3 +fi -- cgit v1.2.3 From be63e483361c9a764f6a68df80e50fd7bd825e6b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 5 Nov 2010 11:39:59 +0100 Subject: Correction of VS2008 redistributables location --- erts/etc/win32/nsis/find_redist.sh | 71 ++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/erts/etc/win32/nsis/find_redist.sh b/erts/etc/win32/nsis/find_redist.sh index de6ebc8e31..4211bd1dfc 100644 --- a/erts/etc/win32/nsis/find_redist.sh +++ b/erts/etc/win32/nsis/find_redist.sh @@ -107,29 +107,56 @@ for x in cl bin vc; do fi BPATH="$NBPATH" done -#echo $BPATH -STARTPATH=$BPATH -for verdir in v2.0 v3.5; do -BPATH=$STARTPATH +BPATH_LIST=$BPATH + +# rc.exe is in the Microsoft SDK directory of VS2008 +RCPATH=`lookup_prog_in_path rc` fail=false -for x in sdk $verdir bootstrapper packages vcredist_x86 vcredist_x86.exe; do - #echo "x=$x" - #echo "BPATH=$BPATH" - NBPATH=`add_path_element $x "$BPATH"` - if [ "$NBPATH" = "$BPATH" ]; then - fail=true - break; +if [ '!' -z "$RCPATH" ]; then + BPATH=$RCPATH + for x in rc bin v6.0A ; do + NBPATH=`remove_path_element $x "$BPATH"` + if [ "$NBPATH" = "$BPATH" ]; then + fail=true + break; + fi + BPATH="$NBPATH" + done + if [ $fail = false ]; then + BPATH_LIST="$BPATH_LIST $BPATH" fi - BPATH="$NBPATH" -done -if [ $fail = false ]; then - break; fi + +# Frantic search through two roots with different +# version directories. We want to be very specific about the +# directory structures as we woildnt want to find the wrong +# redistributables... + +#echo $BPATH +for BP in $BPATH_LIST; do + for verdir in "sdk v2.0" "sdk v3.5" "v6.0A"; do + BPATH=$BP + fail=false + for x in $verdir bootstrapper packages vcredist_x86 vcredist_x86.exe; do + #echo "x=$x" + #echo "BPATH=$BPATH" + NBPATH=`add_path_element $x "$BPATH"` + if [ "$NBPATH" = "$BPATH" ]; then + fail=true + break; + fi + BPATH="$NBPATH" + done + if [ $fail = false ]; then + break; + fi + done + if [ $fail = false ]; then + echo $BPATH + exit 0 + fi done -if [ $fail = false ]; then - echo $BPATH - exit 0 -else - echo "Failed to locate vcredist_x86.exe because directory structure was unexpected" >&2 - exit 3 -fi + +echo "Failed to locate vcredist_x86.exe because directory structure was unexpected" >&2 +exit 3 + -- cgit v1.2.3 From b9101fe19b7f8d659d266152b36cc436b90c77a3 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 4 Nov 2010 17:55:55 +0100 Subject: Make Unicode filenames work on Windows --- erts/emulator/beam/erl_unicode.c | 38 ++- erts/emulator/drivers/common/efile_drv.c | 244 +++++++++------ erts/emulator/drivers/common/erl_efile.h | 10 +- erts/emulator/drivers/unix/unix_efile.c | 4 +- erts/emulator/drivers/win32/win_efile.c | 399 +++++++++++++------------ erts/emulator/sys/common/erl_sys_common_misc.c | 1 + erts/etc/win32/nsis/dll_version_helper.sh | 2 +- erts/etc/win32/nsis/find_redist.sh | 2 +- erts/preloaded/src/prim_file.erl | 29 +- 9 files changed, 408 insertions(+), 321 deletions(-) mode change 100644 => 100755 erts/etc/win32/nsis/dll_version_helper.sh mode change 100644 => 100755 erts/etc/win32/nsis/find_redist.sh diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 6d8e9ccd90..4f26f078ce 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2104,7 +2104,11 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ } - +/* + * This internal bif converts a filename to whatever format is suitable for the file driver + * It also adds zero termination so that prim_file neednt bother with the character encoding + * of the file driver + */ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) { int encoding = erts_get_native_filename_encoding(); @@ -2119,28 +2123,36 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) Uint unipoint; /* Uninterpreted encoding except if windows widechar, in case we convert from utf8 to win_wchar */ + size = binary_size(BIF_ARG_1); + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); if (encoding != ERL_FILENAME_WIN_WCHAR) { - BIF_RET(BIF_ARG_1); + /*Add 0 termination only*/ + bin_term = new_binary(BIF_P, NULL, size+1); + bin_p = binary_bytes(bin_term); + memcpy(bin_p,bytes,size); + bin_p[size]=0; + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(bin_term); } /* In a wchar world, the emulator flags only affect how binaries are interpreted when sent from the user. */ /* Determine real length and create a new binary */ - size = binary_size(BIF_ARG_1); - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK || erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { /* What to do now? Maybe latin1, so just take byte for byte instead */ - bin_term = new_binary(BIF_P, 0, size*2); + bin_term = new_binary(BIF_P, 0, (size+1)*2); bin_p = binary_bytes(bin_term); while (size--) { *bin_p++ = *bytes++; *bin_p++ = 0; } + *bin_p++ = 0; + *bin_p++ = 0; erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(bin_term); } /* OK, UTF8 ok, number of characters is in num_chars */ - bin_term = new_binary(BIF_P, 0, num_chars*2); + bin_term = new_binary(BIF_P, 0, (num_chars+1)*2); bin_p = binary_bytes(bin_term); while (num_chars--) { if (((*bytes) & ((byte) 0x80)) == 0) { @@ -2170,6 +2182,9 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) *bin_p++ = (byte) (unipoint & 0xFF); *bin_p++ = (byte) ((unipoint >> 8) & 0xFF); } + /* zero termination */ + *bin_p++ = 0; + *bin_p++ = 0; erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(bin_term); } /* binary */ @@ -2178,9 +2193,19 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) if ((need = simple_char_need(BIF_ARG_1,encoding)) < 0) { BIF_ERROR(BIF_P,BADARG); } + if (encoding == ERL_FILENAME_WIN_WCHAR) { + need += 2; + } else { + ++need; + } + bin_term = new_binary(BIF_P, 0, need); bin_p = binary_bytes(bin_term); simple_put_chars(BIF_ARG_1,encoding,bin_p); + bin_p[need-1] = 0; + if (encoding == ERL_FILENAME_WIN_WCHAR) { + bin_p[need-2] = 0; + } BIF_RET(bin_term); } @@ -2243,6 +2268,7 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) Uint x = ((Uint) *bytes--) << 8; x |= ((Uint) *bytes--); ret = CONS(hp,make_small(x),ret); + hp += 2; size -= 2; } erts_free_aligned_binary_bytes(temp_alloc); diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index 5aa5f60d0f..ac73897cf2 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -111,11 +111,11 @@ void erl_exit(int n, char *fmt, ...); static ErlDrvSysInfo sys_info; -/*#define TRACE 1*/ +/* #define TRACE 1 */ #ifdef TRACE -# define TRACE_C(c) (putchar(c)) -# define TRACE_S(s) (fputs((s), stdout)) -# define TRACE_F(args) (printf args) +# define TRACE_C(c) do { putchar(c); fflush(stdout); } while (0) +# define TRACE_S(s) do { fputs((s), stdout); fflush(stdout); } while (0) +# define TRACE_F(args) do { printf args ;fflush(stdout); } while (0) #else # define TRACE_C(c) ((void)(0)) # define TRACE_S(s) ((void)(0)) @@ -139,24 +139,54 @@ static ErlDrvSysInfo sys_info; #define MUTEX_UNLOCK(m) #endif - - #if 0 /* Experimental, for forcing all file operations to use the same thread. */ -static unsigned file_fixed_key = 1; -#define KEY(desc) (&file_fixed_key) + static unsigned file_fixed_key = 1; +# define KEY(desc) (&file_fixed_key) #else -#define KEY(desc) (&(desc)->key) +# define KEY(desc) (&(desc)->key) #endif +#ifdef FILENAMES_16BIT +# define FILENAME_BYTELEN(Str) filename_len_16bit(Str) +# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From)) +# define FILENAME_CHARSIZE 2 + + static int filename_len_16bit(char *str) + { + char *p = str; + while(*p != '\0' || p[1] != '\0') { + p += 2; + } + return (p - str); + } + + static void filename_cpy_16bit(char *to, char *from) + { + while(*from != '\0' || from[1] != '\0') { + *to++ = *from++; + *to++ = *from++; + } + *to++ = *from++; + *to++ = *from++; + } + +#else +# define FILENAME_BYTELEN(Str) strlen(Str) +# define FILENAME_COPY(To,From) strcpy(To,From) +# define FILENAME_CHARSIZE 1 +#endif -#if MAXPATHLEN >= BUFSIZ -#define RESBUFSIZE MAXPATHLEN+1 +#if (MAXPATHLEN+1)*FILENAME_CHARSIZE+1 > BUFSIZ +# define RESBUFSIZE ((MAXPATHLEN+1)*FILENAME_CHARSIZE+1) #else -#define RESBUFSIZE BUFSIZ +# define RESBUFSIZE BUFSIZ #endif + + + #define GET_TIME(i, b) \ (i).year = get_int32((b) + 0 * 4); \ (i).month = get_int32((b) + 1 * 4); \ @@ -288,9 +318,9 @@ struct t_preadv { }; #define READDIR_BUFSIZE (8*1024) -#if READDIR_BUFSIZE < (2*MAXPATHLEN) -#undef READDIR_BUFSIZE -#define READDIR_BUFSIZE (2*MAXPATHLEN) +#if READDIR_BUFSIZE < (FILENAME_CHARSIZE*2*(MAXPATHLEN+1)) +# undef READDIR_BUFSIZE +# define READDIR_BUFSIZE (FILENAME_CHARSIZE*2*(MAXPATHLEN+1)) #endif struct t_readdir_buf { @@ -371,6 +401,7 @@ struct t_data }; + #define EF_ALLOC(S) driver_alloc((S)) #define EF_REALLOC(P, S) driver_realloc((P), (S)) #define EF_SAFE_ALLOC(S) ef_safe_alloc((S)) @@ -1290,7 +1321,7 @@ static void invoke_writev(void *data) { p < size && iovcnt < iovlen; p += iov0[iovcnt++].iov_len) ; - iov = EF_ALLOC(sizeof(SysIOVec)*iovcnt); + iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovcnt); memcpy(iov,iov0,iovcnt*sizeof(SysIOVec)); MUTEX_UNLOCK(d->c.writev.q_mtx); /* Let go of lock until we deque from original vector */ @@ -1407,7 +1438,7 @@ static void invoke_pwritev(void *data) { /* Lock the queue just for a while, we don't want it locked during write */ MUTEX_LOCK(c->q_mtx); iov0 = driver_peekq(c->port, &iovlen); - iov = EF_ALLOC(sizeof(SysIOVec)*iovlen); + iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovlen); memcpy(iov,iov0,sizeof(SysIOVec)*iovlen); MUTEX_UNLOCK(c->q_mtx); @@ -1501,7 +1532,7 @@ static void invoke_link(void *data) char *new_name; d->again = 0; - new_name = name+strlen(name)+1; + new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; d->result_ok = efile_link(&d->errInfo, name, new_name); } @@ -1512,7 +1543,7 @@ static void invoke_symlink(void *data) char *new_name; d->again = 0; - new_name = name+strlen(name)+1; + new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; d->result_ok = efile_symlink(&d->errInfo, name, new_name); } @@ -1523,7 +1554,7 @@ static void invoke_rename(void *data) char *new_name; d->again = 0; - new_name = name+strlen(name)+1; + new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; d->result_ok = efile_rename(&d->errInfo, name, new_name); } @@ -1571,13 +1602,15 @@ static void invoke_readdir(void *data) int s; char *p = NULL; int buf_sz = 0; + size_t tmp_bs; d->again = 0; d->errInfo.posix_errno = 0; while (1) { char *str; - if (buf_sz < (4 /* sz */ + 1 /* cmd */ + MAXPATHLEN + 1 /* '\0' */)) { + if (buf_sz < (4 /* sz */ + 1 /* cmd */ + + FILENAME_CHARSIZE*(MAXPATHLEN + 1))) { struct t_readdir_buf *b; if (p) { put_int32(0, p); /* EOB */ @@ -1597,14 +1630,14 @@ static void invoke_readdir(void *data) buf_sz -= 4 + 1; str = p + 4 + 1; ASSERT(buf_sz >= MAXPATHLEN + 1); - s = efile_readdir(&d->errInfo, d->b, &d->dir_handle, str, buf_sz); + tmp_bs = buf_sz; + s = efile_readdir(&d->errInfo, d->b, &d->dir_handle, str, &tmp_bs); if (s) { - int str_sz = strlen(str); - int sz = str_sz + 1; - put_int32(sz, p); - p += 4 + sz; - buf_sz -= str_sz; + put_int32(tmp_bs + 1 /* 1 byte for opcode */, p); + p += 4 + tmp_bs + 1; + ASSERT(p == (str + tmp_bs)); + buf_sz -= tmp_bs; } else { put_int32(1, p); @@ -1971,7 +2004,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) reply_error(desc, &d->errInfo); else { resbuf[0] = FILE_RESP_FNAME; - length = 1+strlen((char*) resbuf+1); + length = 1+FILENAME_BYTELEN((char*) resbuf+1); TRACE_C('R'); driver_output2(desc->port, resbuf, 1, resbuf+1, length-1); } @@ -2033,13 +2066,18 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) int sz = get_int32(p); while (sz) { /* 0 == EOB */ p += 4; - driver_output2(desc->port, p, 1, p+1, sz-1); + if (sz - 1 > 0) { + driver_output2(desc->port, p, 1, p+1, sz-1); + } else { + driver_output2(desc->port, p, 1, NULL, 0); + } p += sz; sz = get_int32(p); } b1 = b1->next; EF_FREE(b2); } + d->c.read_dir.first_buf = NULL; d->c.read_dir.last_buf = NULL; } @@ -2115,9 +2153,9 @@ file_output(ErlDrvData e, char* buf, int count) case FILE_MKDIR: { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_mkdir; d->free = free_data; @@ -2126,9 +2164,9 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_RMDIR: { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_rmdir; d->free = free_data; @@ -2137,9 +2175,9 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_DELETE: { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_delete_file; d->free = free_data; @@ -2149,14 +2187,14 @@ file_output(ErlDrvData e, char* buf, int count) case FILE_RENAME: { char* new_name; - - new_name = name+strlen(name)+1; + int namelen = FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; + new_name = name+namelen; d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + strlen(name) + 1 - + strlen(new_name) + 1); + + namelen + + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - strcpy(d->b, name); - strcpy(d->b + strlen(name) + 1, new_name); + FILENAME_COPY(d->b, name); + FILENAME_COPY(d->b + namelen, new_name); d->flags = desc->flags; d->fd = fd; d->command = command; @@ -2167,9 +2205,9 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_CHDIR: { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_chdir; d->free = free_data; @@ -2192,9 +2230,10 @@ file_output(ErlDrvData e, char* buf, int count) #ifdef USE_THREADS if (sys_info.async_threads > 0) { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + + FILENAME_CHARSIZE); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->dir_handle = NULL; d->command = command; d->invoke = invoke_readdir; @@ -2207,17 +2246,19 @@ file_output(ErlDrvData e, char* buf, int count) else #endif { + size_t resbufsize; char resbuf[RESBUFSIZE+1]; EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */ errInfo.posix_errno = 0; dir_handle = NULL; resbuf[0] = FILE_RESP_FNAME; + resbufsize = RESBUFSIZE; while (efile_readdir(&errInfo, name, &dir_handle, - resbuf+1, RESBUFSIZE)) { - int length = strlen(resbuf+1); - driver_output2(desc->port, resbuf, 1, resbuf+1, length); + resbuf+1, &resbufsize)) { + driver_output2(desc->port, resbuf, 1, resbuf+1, resbufsize); + resbufsize = RESBUFSIZE; } if (errInfo.posix_errno != 0) { reply_error(desc, &errInfo); @@ -2229,11 +2270,12 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_OPEN: { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(buf+4) + 1); + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(buf+4) + + FILENAME_CHARSIZE); d->flags = get_int32((uchar*)buf); name = buf+4; - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_open; d->free = free_data; @@ -2242,44 +2284,45 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_FDATASYNC: - { + { d = EF_SAFE_ALLOC(sizeof(struct t_data)); - + d->fd = fd; d->command = command; d->invoke = invoke_fdatasync; d->free = free_data; d->level = 2; goto done; - } + } case FILE_FSYNC: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->fd = fd; - d->command = command; - d->invoke = invoke_fsync; - d->free = free_data; - d->level = 2; - goto done; - } + { + d = EF_SAFE_ALLOC(sizeof(struct t_data)); + + d->fd = fd; + d->command = command; + d->invoke = invoke_fsync; + d->free = free_data; + d->level = 2; + goto done; + } case FILE_FSTAT: case FILE_LSTAT: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + strlen(name) + 1); + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + + FILENAME_CHARSIZE); + + FILENAME_COPY(d->b, name); + d->fd = fd; + d->command = command; + d->invoke = invoke_flstat; + d->free = free_data; + d->level = 2; + goto done; + } - strcpy(d->b, name); - d->fd = fd; - d->command = command; - d->invoke = invoke_flstat; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_TRUNCATE: { d = EF_SAFE_ALLOC(sizeof(struct t_data)); @@ -2296,7 +2339,7 @@ file_output(ErlDrvData e, char* buf, int count) case FILE_WRITE_INFO: { d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + strlen(buf+21*4) + 1); + + FILENAME_BYTELEN(buf+21*4) + FILENAME_CHARSIZE); d->info.mode = get_int32(buf + 0 * 4); d->info.uid = get_int32(buf + 1 * 4); @@ -2304,7 +2347,7 @@ file_output(ErlDrvData e, char* buf, int count) GET_TIME(d->info.accessTime, buf + 3 * 4); GET_TIME(d->info.modifyTime, buf + 9 * 4); GET_TIME(d->info.cTime, buf + 15 * 4); - strcpy(d->b, buf+21*4); + FILENAME_COPY(d->b, buf+21*4); d->command = command; d->invoke = invoke_write_info; d->free = free_data; @@ -2316,7 +2359,7 @@ file_output(ErlDrvData e, char* buf, int count) { d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); - strcpy(d->b, name); + FILENAME_COPY(d->b, name); d->command = command; d->invoke = invoke_readlink; d->free = free_data; @@ -2325,28 +2368,29 @@ file_output(ErlDrvData e, char* buf, int count) } case FILE_ALTNAME: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); - strcpy(d->b, name); - d->command = command; - d->invoke = invoke_altname; - d->free = free_data; - d->level = 2; - goto done; - } + { + d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); + FILENAME_COPY(d->b, name); + d->command = command; + d->invoke = invoke_altname; + d->free = free_data; + d->level = 2; + goto done; + } case FILE_LINK: { char* new_name; + int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; - new_name = name+strlen(name)+1; + new_name = name+namelen; d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + strlen(name) + 1 - + strlen(new_name) + 1); + + namelen + + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - strcpy(d->b, name); - strcpy(d->b + strlen(name) + 1, new_name); + FILENAME_COPY(d->b, name); + FILENAME_COPY(d->b + namelen, new_name); d->flags = desc->flags; d->fd = fd; d->command = command; @@ -2359,14 +2403,15 @@ file_output(ErlDrvData e, char* buf, int count) case FILE_SYMLINK: { char* new_name; + int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; - new_name = name+strlen(name)+1; + new_name = name+namelen; d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + strlen(name) + 1 - + strlen(new_name) + 1); + + namelen + + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - strcpy(d->b, name); - strcpy(d->b + strlen(name) + 1, new_name); + FILENAME_COPY(d->b, name); + FILENAME_COPY(d->b + namelen, new_name); d->flags = desc->flags; d->fd = fd; d->command = command; @@ -3006,6 +3051,7 @@ file_outputv(ErlDrvData e, ErlIOVec *ev) { case FILE_READ_FILE: { struct t_data *d; + char *filename; if (ev->size < 1+1) { /* Buffer contains empty name */ reply_posix_error(desc, ENOENT); @@ -3016,7 +3062,8 @@ file_outputv(ErlDrvData e, ErlIOVec *ev) { reply_posix_error(desc, EINVAL); goto done; } - d = EF_ALLOC(sizeof(struct t_data) + ev->size); + filename = EV_CHAR_P(ev, p, q); + d = EF_ALLOC(sizeof(struct t_data) -1 + FILENAME_BYTELEN(filename) + FILENAME_CHARSIZE); if (! d) { reply_posix_error(desc, ENOMEM); goto done; @@ -3024,8 +3071,7 @@ file_outputv(ErlDrvData e, ErlIOVec *ev) { d->command = command; d->reply = !0; /* Copy name */ - memcpy(d->c.read_file.name, EV_CHAR_P(ev, p, q), ev->size-1); - d->c.read_file.name[ev->size-1] = '\0'; + FILENAME_COPY(d->c.read_file.name, filename); d->c.read_file.binp = NULL; d->invoke = invoke_read_file; d->free = free_read_file; diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h index ac95c1f949..3097ded3f1 100644 --- a/erts/emulator/drivers/common/erl_efile.h +++ b/erts/emulator/drivers/common/erl_efile.h @@ -59,6 +59,14 @@ #define FA_WRITE 1 #define FA_READ 2 +/* Some OS'es (i.e. Windows) has filenames in wide charaqcters. That requires special handling */ +/* Note that we do *not* honor alignment in the communication to the OS specific driver, */ +/* which is not a problem on x86, but might be on other platforms. The OS specific efile */ +/* implementation is expected to align if needed */ +#ifdef __WIN32__ +#define FILENAMES_16BIT 1 +#endif + /* * An handle to an open directory. To be cast to the correct type * in the system-dependent directory functions. @@ -123,7 +131,7 @@ int efile_getdcwd(Efile_error* errInfo, int drive, char* buffer, size_t size); int efile_readdir(Efile_error* errInfo, char* name, EFILE_DIR_HANDLE* dir_handle, - char* buffer, size_t size); + char* buffer, size_t *size); int efile_openfile(Efile_error* errInfo, char* name, int flags, int* pfd, Sint64* pSize); void efile_closefile(int fd); diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c index b19f632f52..6297ccb8bc 100644 --- a/erts/emulator/drivers/unix/unix_efile.c +++ b/erts/emulator/drivers/unix/unix_efile.c @@ -587,7 +587,8 @@ efile_readdir(Efile_error* errInfo, /* Where to return error codes. */ open directory.*/ char* buffer, /* Pointer to buffer for one filename. */ - size_t size) /* Size of buffer. */ + size_t *size) /* in-out Size of buffer, length + of name. */ { DIR *dp; /* Pointer to directory structure. */ struct dirent* dirp; /* Pointer to directory entry. */ @@ -620,6 +621,7 @@ efile_readdir(Efile_error* errInfo, /* Where to return error codes. */ continue; buffer[0] = '\0'; strncat(buffer, dirp->d_name, size-1); + *size = strlen(dirp->d_name); return 1; } } diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c index 6de08e2fa6..9fb4a66750 100755 --- a/erts/emulator/drivers/win32/win_efile.c +++ b/erts/emulator/drivers/win32/win_efile.c @@ -23,20 +23,20 @@ #include #include "sys.h" #include - +#include #include "erl_efile.h" /* * Microsoft-specific function to map a WIN32 error code to a Posix errno. */ -#define ISSLASH(a) ((a) == '\\' || (a) == '/') +#define ISSLASH(a) ((a) == L'\\' || (a) == L'/') #define ISDIR(st) (((st).st_mode&S_IFMT) == S_IFDIR) #define ISREG(st) (((st).st_mode&S_IFMT) == S_IFREG) #define IS_DOT_OR_DOTDOT(s) \ - (s[0] == '.' && (s[1] == '\0' || (s[1] == '.' && s[2] == '\0'))) + ((s)[0] == L'.' && ((s)[1] == L'\0' || ((s)[1] == L'.' && (s)[2] == L'\0'))) #ifndef INVALID_FILE_ATTRIBUTES #define INVALID_FILE_ATTRIBUTES ((DWORD) 0xFFFFFFFF) @@ -44,9 +44,9 @@ static int check_error(int result, Efile_error* errInfo); static int set_error(Efile_error* errInfo); -static int IsRootUNCName(const char* path); -static int extract_root(char* name); -static unsigned short dos_to_posix_mode(int attr, const char *name); +static int is_root_unc_name(const WCHAR *path); +static int extract_root(WCHAR *name); +static unsigned short dos_to_posix_mode(int attr, const WCHAR *name); static int errno_map(DWORD last_error) { @@ -196,27 +196,26 @@ win_writev(Efile_error* errInfo, int -efile_mkdir(errInfo, name) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of directory to create. */ +efile_mkdir(Efile_error* errInfo, /* Where to return error codes. */ + char* name) /* Name of directory to create. */ { - return check_error(mkdir(name), errInfo); + return check_error(_wmkdir((WCHAR *) name), errInfo); } int -efile_rmdir(errInfo, name) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of directory to delete. */ +efile_rmdir(Efile_error* errInfo, /* Where to return error codes. */ + char* name) /* Name of directory to delete. */ { OSVERSIONINFO os; DWORD attr; + WCHAR *wname = (WCHAR *) name; - if (RemoveDirectory(name) != FALSE) { + if (RemoveDirectoryW(wname) != FALSE) { return 1; } errno = errno_map(GetLastError()); if (errno == EACCES) { - attr = GetFileAttributes(name); + attr = GetFileAttributesW(wname); if (attr != (DWORD) -1) { if ((attr & FILE_ATTRIBUTE_DIRECTORY) == 0) { /* @@ -238,21 +237,21 @@ char* name; /* Name of directory to delete. */ GetVersionEx(&os); if (os.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { HANDLE handle; - WIN32_FIND_DATA data; - char buffer[2*MAX_PATH]; + WIN32_FIND_DATAW data; + WCHAR buffer[2*MAX_PATH]; int len; - len = strlen(name); - strcpy(buffer, name); - if (buffer[0] && buffer[len-1] != '\\' && buffer[len-1] != '/') { - strcat(buffer, "\\"); + len = wcslen(wname); + wcscpy(buffer, wname); + if (buffer[0] && buffer[len-1] != L'\\' && buffer[len-1] != L'/') { + wcscat(buffer, L"\\"); } - strcat(buffer, "*.*"); - handle = FindFirstFile(buffer, &data); + wcscat(buffer, L"*.*"); + handle = FindFirstFileW(buffer, &data); if (handle != INVALID_HANDLE_VALUE) { while (1) { - if ((strcmp(data.cFileName, ".") != 0) - && (strcmp(data.cFileName, "..") != 0)) { + if ((wcscmp(data.cFileName, L".") != 0) + && (wcscmp(data.cFileName, L"..") != 0)) { /* * Found something in this directory. */ @@ -260,7 +259,7 @@ char* name; /* Name of directory to delete. */ errno = EEXIST; break; } - if (FindNextFile(handle, &data) == FALSE) { + if (FindNextFileW(handle, &data) == FALSE) { break; } } @@ -284,19 +283,19 @@ char* name; /* Name of directory to delete. */ } int -efile_delete_file(errInfo, name) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of file to delete. */ +efile_delete_file(Efile_error* errInfo, /* Where to return error codes. */ + char* name) /* Name of file to delete. */ { DWORD attr; + WCHAR *wname = (WCHAR *) name; - if (DeleteFile(name) != FALSE) { + if (DeleteFileW(wname) != FALSE) { return 1; } errno = errno_map(GetLastError()); if (errno == EACCES) { - attr = GetFileAttributes(name); + attr = GetFileAttributesW(wname); if (attr != (DWORD) -1) { if (attr & FILE_ATTRIBUTE_DIRECTORY) { /* @@ -308,7 +307,7 @@ char* name; /* Name of file to delete. */ } } } else if (errno == ENOENT) { - attr = GetFileAttributes(name); + attr = GetFileAttributesW(wname); if (attr != (DWORD) -1) { if (attr & FILE_ATTRIBUTE_DIRECTORY) { /* @@ -362,20 +361,21 @@ char* name; /* Name of file to delete. */ */ int -efile_rename(errInfo, src, dst) -Efile_error* errInfo; /* Where to return error codes. */ -char* src; /* Original name. */ -char* dst; /* New name. */ +efile_rename(Efile_error* errInfo, /* Where to return error codes. */ + char* src, /* Original name. */ + char* dst) /* New name. */ { DWORD srcAttr, dstAttr; + WCHAR *wsrc = (WCHAR *) src; + WCHAR *wdst = (WCHAR *) dst; - if (MoveFile(src, dst) != FALSE) { + if (MoveFileW(wsrc, wdst) != FALSE) { return 1; } errno = errno_map(GetLastError()); - srcAttr = GetFileAttributes(src); - dstAttr = GetFileAttributes(dst); + srcAttr = GetFileAttributesW(wsrc); + dstAttr = GetFileAttributesW(wdst); if (srcAttr == (DWORD) -1) { srcAttr = 0; } @@ -390,22 +390,22 @@ char* dst; /* New name. */ if (errno == EACCES) { decode: if (srcAttr & FILE_ATTRIBUTE_DIRECTORY) { - char srcPath[MAX_PATH], dstPath[MAX_PATH]; - char *srcRest, *dstRest; + WCHAR srcPath[MAX_PATH], dstPath[MAX_PATH]; + WCHAR *srcRest, *dstRest; int size; - size = GetFullPathName(src, sizeof(srcPath), srcPath, &srcRest); - if ((size == 0) || (size > sizeof(srcPath))) { + size = GetFullPathNameW(wsrc, MAX_PATH, srcPath, &srcRest); + if ((size == 0) || (size > MAX_PATH)) { return check_error(-1, errInfo); } - size = GetFullPathName(dst, sizeof(dstPath), dstPath, &dstRest); - if ((size == 0) || (size > sizeof(dstPath))) { + size = GetFullPathNameW(wdst, MAX_PATH, dstPath, &dstRest); + if ((size == 0) || (size > MAX_PATH)) { return check_error(-1, errInfo); } if (srcRest == NULL) { - srcRest = srcPath + strlen(srcPath); + srcRest = srcPath + wcslen(srcPath); } - if (strnicmp(srcPath, dstPath, srcRest - srcPath) == 0) { + if (_wcsnicmp(srcPath, dstPath, srcRest - srcPath) == 0) { /* * Trying to move a directory into itself. */ @@ -420,14 +420,14 @@ char* dst; /* New name. */ } (void) extract_root(dstPath); - if (dstPath[0] == '\0') { + if (dstPath[0] == L'\0') { /* * The filename was invalid. (Don't know why, * but play it safe.) */ errno = EINVAL; } - if (stricmp(srcPath, dstPath) != 0) { + if (_wcsicmp(srcPath, dstPath) != 0) { /* * If src is a directory and dst filesystem != src * filesystem, errno should be EXDEV. It is very @@ -463,14 +463,14 @@ char* dst; /* New name. */ * fails, it's because it wasn't empty. */ - if (RemoveDirectory(dst)) { + if (RemoveDirectoryW(wdst)) { /* * Now that that empty directory is gone, we can try * renaming again. If that fails, we'll put this empty * directory back, for completeness. */ - if (MoveFile(src, dst) != FALSE) { + if (MoveFileW(wsrc, wdst) != FALSE) { return 1; } @@ -480,8 +480,8 @@ char* dst; /* New name. */ */ errno = errno_map(GetLastError()); - CreateDirectory(dst, NULL); - SetFileAttributes(dst, dstAttr); + CreateDirectoryW(wdst, NULL); + SetFileAttributesW(wdst, dstAttr); if (errno == EACCES) { /* * Decode the EACCES to a more meaningful error. @@ -506,17 +506,17 @@ char* dst; /* New name. */ * put temp file back to old name. */ - char tempName[MAX_PATH]; + WCHAR tempName[MAX_PATH]; int result, size; - char *rest; + WCHAR *rest; - size = GetFullPathName(dst, sizeof(tempName), tempName, &rest); - if ((size == 0) || (size > sizeof(tempName)) || (rest == NULL)) { + size = GetFullPathNameW(wdst, MAX_PATH, tempName, &rest); + if ((size == 0) || (size > MAX_PATH) || (rest == NULL)) { return check_error(-1, errInfo); } - *rest = '\0'; + *rest = L'\0'; result = -1; - if (GetTempFileName(tempName, "erlr", 0, tempName) != 0) { + if (GetTempFileNameW(tempName, L"erlr", 0, tempName) != 0) { /* * Strictly speaking, need the following DeleteFile and * MoveFile to be joined as an atomic operation so no @@ -524,15 +524,15 @@ char* dst; /* New name. */ * same temp file. */ - DeleteFile(tempName); - if (MoveFile(dst, tempName) != FALSE) { - if (MoveFile(src, dst) != FALSE) { - SetFileAttributes(tempName, FILE_ATTRIBUTE_NORMAL); - DeleteFile(tempName); + DeleteFileW(tempName); + if (MoveFileW(wdst, tempName) != FALSE) { + if (MoveFileW(wsrc, wdst) != FALSE) { + SetFileAttributesW(tempName, FILE_ATTRIBUTE_NORMAL); + DeleteFileW(tempName); return 1; } else { - DeleteFile(dst); - MoveFile(tempName, dst); + DeleteFileW(wdst); + MoveFileW(tempName, wdst); } } @@ -558,11 +558,10 @@ char* dst; /* New name. */ } int -efile_chdir(errInfo, name) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of directory to make current. */ +efile_chdir(Efile_error* errInfo, /* Where to return error codes. */ + char* name) /* Name of directory to make current. */ { - int success = check_error(chdir(name), errInfo); + int success = check_error(_wchdir((WCHAR *) name), errInfo); if (!success && errInfo->posix_errno == EINVAL) /* POSIXification of errno */ errInfo->posix_errno = ENOENT; @@ -570,59 +569,65 @@ char* name; /* Name of directory to make current. */ } int -efile_getdcwd(errInfo, drive, buffer, size) -Efile_error* errInfo; /* Where to return error codes. */ -int drive; /* 0 - current, 1 - A, 2 - B etc. */ -char* buffer; /* Where to return the current directory. */ -size_t size; /* Size of buffer. */ +efile_getdcwd(Efile_error* errInfo, /* Where to return error codes. */ + int drive, /* 0 - current, 1 - A, 2 - B etc. */ + char* buffer, /* Where to return the current directory. */ + size_t size) /* Size of buffer. */ { - if (_getdcwd(drive, buffer, size) == NULL) + WCHAR *wbuffer = (WCHAR *) buffer; + size_t wbuffer_size = size / 2; + if (_wgetdcwd(drive, wbuffer, wbuffer_size) == NULL) return check_error(-1, errInfo); - for ( ; *buffer; buffer++) - if (*buffer == '\\') - *buffer = '/'; + for ( ; *wbuffer; wbuffer++) + if (*wbuffer == L'\\') + *wbuffer = L'/'; return 1; } int -efile_readdir(errInfo, name, dir_handle, buffer, size) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of directory to open. */ -EFILE_DIR_HANDLE* dir_handle; /* Directory handle of open directory. */ -char* buffer; /* Pointer to buffer for one filename. */ -size_t size; /* Size of buffer. */ +efile_readdir(Efile_error* errInfo, /* Where to return error codes. */ + char* name, /* Name of directory to list */ + EFILE_DIR_HANDLE* dir_handle, /* Handle of opened directory or NULL */ + char* buffer, /* Buffer to put one filename in */ + size_t *size) /* in-out size of buffer/size of filename excluding zero + termination in bytes*/ { HANDLE dir; /* Handle to directory. */ - char wildcard[MAX_PATH]; /* Wildcard to search for. */ - WIN32_FIND_DATA findData; /* Data found by FindFirstFile() or FindNext(). */ + WCHAR wildcard[MAX_PATH]; /* Wildcard to search for. */ + WIN32_FIND_DATAW findData; /* Data found by FindFirstFile() or FindNext(). */ + /* Alignment is not honored, this works on x86 because of alignment fixup by processor. + Not perfect, but faster than alinging by hand (really) */ + WCHAR *wname = (WCHAR *) name; + WCHAR *wbuffer = (WCHAR *) buffer; /* * First time we must setup everything. */ if (*dir_handle == NULL) { - int length = strlen(name); - char* s; + int length = wcslen(wname); + WCHAR* s; if (length+3 >= MAX_PATH) { errno = ENAMETOOLONG; return check_error(-1, errInfo); } - strcpy(wildcard, name); + wcscpy(wildcard, wname); s = wildcard+length-1; - if (*s != '/' && *s != '\\') - *++s = '\\'; - *++s = '*'; - *++s = '\0'; - DEBUGF(("Reading %s\n", wildcard)); - dir = FindFirstFile(wildcard, &findData); + if (*s != L'/' && *s != L'\\') + *++s = L'\\'; + *++s = L'*'; + *++s = L'\0'; + DEBUGF(("Reading %ws\n", wildcard)); + dir = FindFirstFileW(wildcard, &findData); if (dir == INVALID_HANDLE_VALUE) return set_error(errInfo); *dir_handle = (EFILE_DIR_HANDLE) dir; if (!IS_DOT_OR_DOTDOT(findData.cFileName)) { - strcpy(buffer, findData.cFileName); + wcscpy(wbuffer, findData.cFileName); + *size = wcslen(wbuffer)*2; return 1; } } @@ -635,10 +640,11 @@ size_t size; /* Size of buffer. */ dir = (HANDLE) *dir_handle; for (;;) { - if (FindNextFile(dir, &findData)) { + if (FindNextFileW(dir, &findData)) { if (IS_DOT_OR_DOTDOT(findData.cFileName)) continue; - strcpy(buffer, findData.cFileName); + wcscpy(wbuffer, findData.cFileName); + *size = wcslen(wbuffer)*2; return 1; } @@ -655,17 +661,17 @@ size_t size; /* Size of buffer. */ } int -efile_openfile(errInfo, name, flags, pfd, pSize) -Efile_error* errInfo; /* Where to return error codes. */ -char* name; /* Name of directory to open. */ -int flags; /* Flags to use for opening. */ -int* pfd; /* Where to store the file descriptor. */ -Sint64* pSize; /* Where to store the size of the file. */ +efile_openfile(Efile_error* errInfo, /* Where to return error codes. */ + char* name, /* Name of directory to open. */ + int flags, /* Flags to use for opening. */ + int* pfd, /* Where to store the file descriptor. */ + Sint64* pSize) /* Where to store the size of the file. */ { BY_HANDLE_FILE_INFORMATION fileInfo; /* File information from a handle. */ HANDLE fd; /* Handle to open file. */ DWORD access; /* Access mode: GENERIC_READ, GENERIC_WRITE. */ DWORD crFlags; + WCHAR *wname = (WCHAR *) name; switch (flags & (EFILE_MODE_READ|EFILE_MODE_WRITE)) { case EFILE_MODE_READ: @@ -692,7 +698,7 @@ Sint64* pSize; /* Where to store the size of the file. */ if (flags & EFILE_MODE_EXCL) { crFlags = CREATE_NEW; } - fd = CreateFile(name, access, + fd = CreateFileW(wname, access, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, crFlags, FILE_ATTRIBUTE_NORMAL, NULL); @@ -711,7 +717,7 @@ Sint64* pSize; /* Where to store the size of the file. */ * to EISDIR. */ if (errInfo->posix_errno && - (attr = GetFileAttributes(name)) != INVALID_FILE_ATTRIBUTES && + (attr = GetFileAttributesW(wname)) != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY)) { errInfo->posix_errno = EISDIR; } @@ -735,9 +741,10 @@ Sint64* pSize; /* Where to store the size of the file. */ int efile_may_openfile(Efile_error* errInfo, char *name) { + WCHAR *wname = (WCHAR *) name; DWORD attr; - if ((attr = GetFileAttributes(name)) == INVALID_FILE_ATTRIBUTES) { + if ((attr = GetFileAttributesW(wname)) == INVALID_FILE_ATTRIBUTES) { return check_error(-1, errInfo); } @@ -746,18 +753,6 @@ efile_may_openfile(Efile_error* errInfo, char *name) { return check_error(-1, errInfo); } return 1; -#if 0 - struct stat statbuf; - - if (stat(name, &statbuf)) { - return check_error(-1, errInfo); - } - if (ISDIR(statbuf)) { - errno = EISDIR; - return check_error(-1, errInfo); - } - return 1; -#endif } void @@ -792,16 +787,17 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, char* orig_name, int info_for_link) { HANDLE findhandle; /* Handle returned by FindFirstFile(). */ - WIN32_FIND_DATA findbuf; /* Data return by FindFirstFile(). */ - char name[_MAX_PATH]; + WIN32_FIND_DATAW findbuf; /* Data return by FindFirstFile(). */ + WCHAR name[_MAX_PATH]; int name_len; - char* path; - char pathbuf[_MAX_PATH]; + WCHAR *path; + WCHAR pathbuf[_MAX_PATH]; int drive; /* Drive for filename (1 = A:, 2 = B: etc). */ + WCHAR *worig_name = (WCHAR *) orig_name; /* Don't allow wildcards to be interpreted by system */ - if (strpbrk(orig_name, "?*")) { + if (wcspbrk(worig_name, L"?*")) { enoent: errInfo->posix_errno = ENOENT; errInfo->os_errno = ERROR_FILE_NOT_FOUND; @@ -813,25 +809,25 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, * slash, because it causes FindFirstFile() to fail on Win95. */ - if ((name_len = strlen(orig_name)) >= _MAX_PATH) { + if ((name_len = wcslen(worig_name)) >= _MAX_PATH) { goto enoent; } else { - strcpy(name, orig_name); + wcscpy(name, worig_name); if (name_len > 2 && ISSLASH(name[name_len-1]) && - name[name_len-2] != ':') { - name[name_len-1] = '\0'; + name[name_len-2] != L':') { + name[name_len-1] = L'\0'; } } /* Try to get disk from name. If none, get current disk. */ - if (name[1] != ':') { + if (name[1] != L':') { drive = 0; - if (GetCurrentDirectory(sizeof(pathbuf), pathbuf) && - pathbuf[1] == ':') { - drive = tolower(pathbuf[0]) - 'a' + 1; + if (GetCurrentDirectoryW(_MAX_PATH, pathbuf) && + pathbuf[1] == L':') { + drive = towlower(pathbuf[0]) - L'a' + 1; } - } else if (*name && name[2] == '\0') { + } else if (*name && name[2] == L'\0') { /* * X: and nothing more is an error. */ @@ -839,15 +835,15 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, errInfo->os_errno = ERROR_FILE_NOT_FOUND; return 0; } else - drive = tolower(*name) - 'a' + 1; + drive = towlower(*name) - L'a' + 1; - findhandle = FindFirstFile(name, &findbuf); + findhandle = FindFirstFileW(name, &findbuf); if (findhandle == INVALID_HANDLE_VALUE) { - if (!(strpbrk(name, "./\\") && - (path = _fullpath(pathbuf, name, _MAX_PATH)) && + if (!(wcspbrk(name, L"./\\") && + (path = _wfullpath(pathbuf, name, _MAX_PATH)) && /* root dir. ('C:\') or UNC root dir. ('\\server\share\') */ - ((strlen(path) == 3) || IsRootUNCName(path)) && - (GetDriveType(path) > 1) ) ) { + ((wcslen(path) == 3) || is_root_unc_name(path)) && + (GetDriveTypeW(path) > 1) ) ) { errInfo->posix_errno = ENOENT; errInfo->os_errno = ERROR_FILE_NOT_FOUND; return 0; @@ -860,7 +856,7 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, findbuf.dwFileAttributes = FILE_ATTRIBUTE_DIRECTORY; findbuf.nFileSizeHigh = 0; findbuf.nFileSizeLow = 0; - findbuf.cFileName[0] = '\0'; + findbuf.cFileName[0] = L'\0'; pInfo->links = 1; pInfo->modifyTime.year = 1980; @@ -960,10 +956,9 @@ if (!FileTimeToLocalFileTime(&findbuf.src, &LocalFTime) || \ } int -efile_write_info(errInfo, pInfo, name) -Efile_error* errInfo; -Efile_info* pInfo; -char* name; +efile_write_info(Efile_error* errInfo, + Efile_info* pInfo, + char* name) { SYSTEMTIME timebuf; FILETIME LocalFileTime; @@ -977,12 +972,13 @@ char* name; DWORD attr; DWORD tempAttr; BOOL modifyTime = FALSE; + WCHAR *wname = (WCHAR *) name; /* * Get the attributes for the file. */ - tempAttr = attr = GetFileAttributes((LPTSTR)name); + tempAttr = attr = GetFileAttributesW(wname); if (attr == 0xffffffff) { return set_error(errInfo); } @@ -1036,12 +1032,12 @@ char* name; if (tempAttr & FILE_ATTRIBUTE_READONLY) { tempAttr &= ~FILE_ATTRIBUTE_READONLY; - if (!SetFileAttributes((LPTSTR) name, tempAttr)) { + if (!SetFileAttributesW(wname, tempAttr)) { return set_error(errInfo); } } - fd = CreateFile(name, GENERIC_READ|GENERIC_WRITE, + fd = CreateFileW(wname, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (fd != INVALID_HANDLE_VALUE) { @@ -1059,7 +1055,7 @@ char* name; */ if (tempAttr != attr) { - if (!SetFileAttributes((LPTSTR) name, attr)) { + if (!SetFileAttributesW(wname, attr)) { return set_error(errInfo); } } @@ -1235,7 +1231,7 @@ int flags; /* - * IsRootUNCName - returns TRUE if the argument is a UNC name specifying + * is_root_unc_name - returns TRUE if the argument is a UNC name specifying * a root share. That is, if it is of the form \\server\share\. * This routine will also return true if the argument is of the * form \\server\share (no trailing slash) but Win32 currently @@ -1245,16 +1241,16 @@ int flags; */ static int -IsRootUNCName(const char* path) +is_root_unc_name(const WCHAR *path) { /* * If a root UNC name, path will start with 2 (but not 3) slashes */ - if ((strlen(path) >= 5) /* minimum string is "//x/y" */ + if ((wcslen(path) >= 5) /* minimum string is "//x/y" */ && ISSLASH(path[0]) && ISSLASH(path[1])) { - const char * p = path + 2 ; + const WCHAR *p = path + 2; /* * find the slash between the server name and share name @@ -1297,19 +1293,19 @@ IsRootUNCName(const char* path) */ static int -extract_root(char* name) +extract_root(WCHAR* name) { - int len = strlen(name); + int len = wcslen(name); - if (isalpha(name[0]) && name[1] == ':' && ISSLASH(name[2])) { - int c = name[3]; - name[3] = '\0'; - return c == '\0'; + if (iswalpha(name[0]) && name[1] == L':' && ISSLASH(name[2])) { + WCHAR c = name[3]; + name[3] = L'\0'; + return c == L'\0'; } else if (len < 5 || !ISSLASH(name[0]) || !ISSLASH(name[1])) { goto error; } else { /* Try to find the end of the UNC name. */ - char* p; - int c; + WCHAR* p; + WCHAR c; /* * Find the slash between the server name and share name. @@ -1318,7 +1314,7 @@ extract_root(char* name) for (p = name + 2; *p; p++) if (ISSLASH(*p)) break; - if (*p == '\0') + if (*p == L'\0') goto error; /* @@ -1329,24 +1325,24 @@ extract_root(char* name) if (ISSLASH(*p)) break; c = *p; - *p = '\0'; - return c == '\0' || p[1] == '\0'; + *p = L'\0'; + return c == L'\0' || p[1] == L'\0'; } error: - *name = '\0'; + *name = L'\0'; return 1; } static unsigned short -dos_to_posix_mode(int attr, const char *name) +dos_to_posix_mode(int attr, const WCHAR *name) { register unsigned short uxmode; unsigned dosmode; - register const char *p; + register const WCHAR *p; dosmode = attr & 0xff; - if ((p = name)[1] == ':') + if ((p = name)[1] == L':') p += 2; /* check to see if this is a directory - note we must make a special @@ -1355,7 +1351,7 @@ dos_to_posix_mode(int attr, const char *name) uxmode = (unsigned short) (((ISSLASH(*p) && !p[1]) || (dosmode & FILE_ATTRIBUTE_DIRECTORY) || - *p == '\0') ? _S_IFDIR|_S_IEXEC : _S_IFREG); + *p == L'\0') ? _S_IFDIR|_S_IEXEC : _S_IFREG); /* If attribute byte does not have read-only bit, it is read-write */ @@ -1364,11 +1360,11 @@ dos_to_posix_mode(int attr, const char *name) /* see if file appears to be executable - check extension of name */ - if (p = strrchr(name, '.')) { - if (!stricmp(p, ".exe") || - !stricmp(p, ".cmd") || - !stricmp(p, ".bat") || - !stricmp(p, ".com")) + if (p = wcsrchr(name, L'.')) { + if (!_wcsicmp(p, L".exe") || + !_wcsicmp(p, L".cmd") || + !_wcsicmp(p, L".bat") || + !_wcsicmp(p, L".com")) uxmode |= _S_IEXEC; } @@ -1433,17 +1429,20 @@ efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size) int efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) { - WIN32_FIND_DATA wfd; + WIN32_FIND_DATAW wfd; HANDLE fh; - char name[_MAX_PATH]; + WCHAR name[_MAX_PATH+1]; int name_len; - char* path; - char pathbuf[_MAX_PATH]; + WCHAR* path; + WCHAR pathbuf[_MAX_PATH+1]; /* Unclear weather GetCurrentDirectory will access one char after + _MAX_PATH */ + WCHAR *worig_name = (WCHAR *) orig_name; + WCHAR *wbuffer = (WCHAR *) buffer; int drive; /* Drive for filename (1 = A:, 2 = B: etc). */ /* Don't allow wildcards to be interpreted by system */ - if (strpbrk(orig_name, "?*")) { + if (wcspbrk(worig_name, L"?*")) { enoent: errInfo->posix_errno = ENOENT; errInfo->os_errno = ERROR_FILE_NOT_FOUND; @@ -1455,57 +1454,61 @@ efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) * slash, because it causes FindFirstFile() to fail on Win95. */ - if ((name_len = strlen(orig_name)) >= _MAX_PATH) { + if ((name_len = wcslen(worig_name)) >= _MAX_PATH) { goto enoent; } else { - strcpy(name, orig_name); + wcscpy(name, worig_name); if (name_len > 2 && ISSLASH(name[name_len-1]) && - name[name_len-2] != ':') { - name[name_len-1] = '\0'; + name[name_len-2] != L':') { + name[name_len-1] = L'\0'; } } /* Try to get disk from name. If none, get current disk. */ - if (name[1] != ':') { + if (name[1] != L':') { drive = 0; - if (GetCurrentDirectory(sizeof(pathbuf), pathbuf) && - pathbuf[1] == ':') { - drive = tolower(pathbuf[0]) - 'a' + 1; + if (GetCurrentDirectoryW(_MAX_PATH, pathbuf) && + pathbuf[1] == L':') { + drive = towlower(pathbuf[0]) - L'a' + 1; } - } else if (*name && name[2] == '\0') { + } else if (*name && name[2] == L'\0') { /* * X: and nothing more is an error. */ goto enoent; } else { - drive = tolower(*name) - 'a' + 1; + drive = towlower(*name) - L'a' + 1; } - fh = FindFirstFile(name,&wfd); + fh = FindFirstFileW(name,&wfd); if (fh == INVALID_HANDLE_VALUE) { - if (!(strpbrk(name, "./\\") && - (path = _fullpath(pathbuf, name, _MAX_PATH)) && + if (!(wcspbrk(name, L"./\\") && + (path = _wfullpath(pathbuf, name, _MAX_PATH)) && /* root dir. ('C:\') or UNC root dir. ('\\server\share\') */ - ((strlen(path) == 3) || IsRootUNCName(path)) && - (GetDriveType(path) > 1) ) ) { + ((wcslen(path) == 3) || is_root_unc_name(path)) && + (GetDriveTypeW(path) > 1) ) ) { errno = errno_map(GetLastError()); return check_error(-1, errInfo); } /* * Root directories (such as C:\ or \\server\share\ are fabricated. */ - strcpy(buffer,name); + wcscpy(wbuffer,name); return 1; } - strcpy(buffer,wfd.cAlternateFileName); - if (!*buffer) { - strcpy(buffer,wfd.cFileName); + wcscpy(wbuffer,wfd.cAlternateFileName); + if (!*wbuffer) { + wcscpy(wbuffer,wfd.cFileName); } return 1; } +/* + * XXX: link and symlink Implemented in pu (?), will need conversion + */ + int efile_link(Efile_error* errInfo, char* old, char* new) { diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 581c14b6c6..7338ffc963 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -60,6 +60,7 @@ int erts_get_user_requested_filename_encoding(void) void erts_init_sys_common_misc(void) { #if defined(__WIN32__) + /* win_efile will totally fail if this is not set. */ filename_encoding = ERL_FILENAME_WIN_WCHAR; #else if (user_filename_encoding != ERL_FILENAME_UNKNOWN) { diff --git a/erts/etc/win32/nsis/dll_version_helper.sh b/erts/etc/win32/nsis/dll_version_helper.sh old mode 100644 new mode 100755 index 73a9183d68..571ee3e39e --- a/erts/etc/win32/nsis/dll_version_helper.sh +++ b/erts/etc/win32/nsis/dll_version_helper.sh @@ -2,7 +2,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2007-2009. All Rights Reserved. +# Copyright Ericsson AB 2007-2010. All Rights Reserved. # # The contents of this file are subject to the Erlang Public License, # Version 1.1, (the "License"); you may not use this file except in diff --git a/erts/etc/win32/nsis/find_redist.sh b/erts/etc/win32/nsis/find_redist.sh old mode 100644 new mode 100755 index 4211bd1dfc..153977ded5 --- a/erts/etc/win32/nsis/find_redist.sh +++ b/erts/etc/win32/nsis/find_redist.sh @@ -2,7 +2,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2007-2009. All Rights Reserved. +# Copyright Ericsson AB 2007-2010. All Rights Reserved. # # The contents of this file are subject to the Erlang Public License, # Version 1.1, (the "License"); you may not use this file except in diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index b15997572c..75b0faacd3 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -199,7 +199,7 @@ open_int({Driver, Portopts}, File, Mode, Setopts) -> end; open_int(Port, File, Mode, Setopts) -> M = Mode band ?EFILE_MODE_MASK, - case drv_command(Port, [<>, pathname(File), 0]) of + case drv_command(Port, [<>, pathname(File)]) of {ok, Number} -> open_int_setopts(Port, Number, Setopts); Error -> @@ -642,7 +642,7 @@ set_cwd_int(Port, Dir0) -> end), %% Dir is now either a string or an EXIT tuple. %% An EXIT tuple will fail in the following catch. - drv_command(Port, [?FILE_CHDIR, pathname(Dir), 0]). + drv_command(Port, [?FILE_CHDIR, pathname(Dir)]). @@ -655,7 +655,7 @@ delete(Port, File) when is_port(Port) -> delete_int(Port, File). delete_int(Port, File) -> - drv_command(Port, [?FILE_DELETE, pathname(File), 0]). + drv_command(Port, [?FILE_DELETE, pathname(File)]). @@ -668,7 +668,7 @@ rename(Port, From, To) when is_port(Port) -> rename_int(Port, From, To). rename_int(Port, From, To) -> - drv_command(Port, [?FILE_RENAME, pathname(From), 0, pathname(To), 0]). + drv_command(Port, [?FILE_RENAME, pathname(From), pathname(To)]). @@ -681,7 +681,7 @@ make_dir(Port, Dir) when is_port(Port) -> make_dir_int(Port, Dir). make_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_MKDIR, pathname(Dir), 0]). + drv_command(Port, [?FILE_MKDIR, pathname(Dir)]). @@ -694,7 +694,7 @@ del_dir(Port, Dir) when is_port(Port) -> del_dir_int(Port, Dir). del_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_RMDIR, pathname(Dir), 0]). + drv_command(Port, [?FILE_RMDIR, pathname(Dir)]). @@ -707,7 +707,7 @@ read_file_info(Port, File) when is_port(Port) -> read_file_info_int(Port, File). read_file_info_int(Port, File) -> - drv_command(Port, [?FILE_FSTAT, pathname(File), 0]). + drv_command(Port, [?FILE_FSTAT, pathname(File)]). %% altname/{1,2} @@ -718,7 +718,7 @@ altname(Port, File) when is_port(Port) -> altname_int(Port, File). altname_int(Port, File) -> - drv_command(Port, [?FILE_ALTNAME, pathname(File), 0]). + drv_command(Port, [?FILE_ALTNAME, pathname(File)]). %% write_file_info/{2,3} @@ -749,7 +749,7 @@ write_file_info_int(Port, date_to_bytes(Atime), date_to_bytes(Mtime), date_to_bytes(Ctime), - pathname(File), 0]). + pathname(File)]). @@ -762,7 +762,7 @@ make_link(Port, Old, New) when is_port(Port) -> make_link_int(Port, Old, New). make_link_int(Port, Old, New) -> - drv_command(Port, [?FILE_LINK, pathname(Old), 0, pathname(New), 0]). + drv_command(Port, [?FILE_LINK, pathname(Old), pathname(New)]). @@ -775,7 +775,7 @@ make_symlink(Port, Old, New) when is_port(Port) -> make_symlink_int(Port, Old, New). make_symlink_int(Port, Old, New) -> - drv_command(Port, [?FILE_SYMLINK, pathname(Old), 0, pathname(New), 0]). + drv_command(Port, [?FILE_SYMLINK, pathname(Old), pathname(New)]). @@ -788,7 +788,7 @@ read_link(Port, Link) when is_port(Port) -> read_link_int(Port, Link). read_link_int(Port, Link) -> - drv_command(Port, [?FILE_READLINK, pathname(Link), 0]). + drv_command(Port, [?FILE_READLINK, pathname(Link)]). @@ -801,7 +801,7 @@ read_link_info(Port, Link) when is_port(Port) -> read_link_info_int(Port, Link). read_link_info_int(Port, Link) -> - drv_command(Port, [?FILE_LSTAT, pathname(Link), 0]). + drv_command(Port, [?FILE_LSTAT, pathname(Link)]). @@ -814,7 +814,7 @@ list_dir(Port, Dir) when is_port(Port) -> list_dir_int(Port, Dir). list_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_READDIR, pathname(Dir), 0], []). + drv_command(Port, [?FILE_READDIR, pathname(Dir)], []). @@ -1227,5 +1227,6 @@ lists_split([Hd | Tl], N, Rev) -> reverse(X) -> lists:reverse(X, []). reverse(L, T) -> lists:reverse(L, T). +% Will add zero termination too pathname(File) -> prim_file:internal_name2native(File). -- cgit v1.2.3 From 5da29a8c5fe9584e274d2fe9b95dead8334d419a Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 8 Nov 2010 17:34:41 +0100 Subject: Correct shell utilities to handle unicode and possibly binaries --- lib/stdlib/src/c.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 399b91b92f..d04d8f191f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [fixup_one_bin(Str)]); + ok = io:format("~ts\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -714,7 +714,7 @@ ls_print(X, Width, Len) when Width + Len >= 80 -> io:nl(), ls_print(X, Width, 0); ls_print([H|T], Width, Len) -> - io:format("~-*s",[Width,H]), + io:format("~-*ts",[Width,H]), ls_print(T, Width, Len+Width); ls_print([], _, _) -> io:nl(). -- cgit v1.2.3 From f16693691cbc8d6d4bff89c44cdc2c120ea637c3 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 18 Nov 2010 16:50:08 +0100 Subject: Add testcases --- lib/kernel/test/Makefile | 1 + lib/kernel/test/file_name_SUITE.erl | 713 ++++++++++++++++++++++++++++++++++++ 2 files changed, 714 insertions(+) create mode 100644 lib/kernel/test/file_name_SUITE.erl diff --git a/lib/kernel/test/Makefile b/lib/kernel/test/Makefile index 293c368e2a..f84b343de8 100644 --- a/lib/kernel/test/Makefile +++ b/lib/kernel/test/Makefile @@ -51,6 +51,7 @@ MODULES= \ error_logger_SUITE \ error_logger_warn_SUITE \ file_SUITE \ + file_name_SUITE \ prim_file_SUITE \ ram_file_SUITE \ gen_tcp_api_SUITE \ diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl new file mode 100644 index 0000000000..ba1a829149 --- /dev/null +++ b/lib/kernel/test/file_name_SUITE.erl @@ -0,0 +1,713 @@ +-module(file_name_SUITE). +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-include("test_server.hrl"). +-include_lib("kernel/include/file.hrl"). + +%% +%% File operations that take filenames as parameters (* not prim_file operation) (** a drive): +%% altname +%% copy (*) +%% del_dir +%% delete +%% get_cwd (**) +%% list_dir +%% make_dir +%% make_link +%% make_symlink +%% open +%% read_file +%% read_file_info +%% read_link +%% read_link_info +%% rename +%% set_cwd +%% write_file +%% write_file_info +%% +%% File operations that opens/uses separate driver port (not connected to file) +%% altname +%% del_dir +%% delete +%% get_cwd +%% list_dir +%% make_dir +%% make_link +%% make_symlink +%% read_file_info +%% read_link +%% read_link_info +%% rename +%% set_cwd +%% write_file_info +%% +%% Operations that use ?FD_DRV in prim_file +%% open +%% read_file +%% write_file +%% +%% +%% Operations that return a filename/path +%% altname +%% get_cwd +%% list_dir +%% read_link + +-export([all/1,init_per_testcase/2, fin_per_testcase/2]). +-export([normal/1,icky/1,very_icky/1]). + + +init_per_testcase(_Func, Config) -> + Dog = test_server:timetrap(test_server:seconds(60)), + [{watchdog,Dog}|Config]. + +fin_per_testcase(_Func, Config) -> + Dog = ?config(watchdog, Config), + test_server:timetrap_cancel(Dog). + + +all(suite) -> + [normal,icky,very_icky]. + +normal(suite) -> + []; +normal(doc) -> + "Check file operations on normal file names regardless of unicode mode"; +normal(Config) when is_list(Config) -> + {ok,Dir} = file:get_cwd(), + try + Priv = ?config(priv_dir, Config), + file:set_cwd(Priv), + put(file_module,prim_file), + ok = check_normal(prim_file), + put(file_module,file), + ok = check_normal(file) + after + file:set_cwd(Dir) + end. + + +icky(suite) -> + []; +icky(doc) -> + "Check file operations on normal file names regardless of unicode mode"; +icky(Config) when is_list(Config) -> + {ok,Dir} = file:get_cwd(), + try + Priv = ?config(priv_dir, Config), + file:set_cwd(Priv), + put(file_module,prim_file), + ok = check_icky(prim_file), + put(file_module,file), + ok = check_icky(file) + after + file:set_cwd(Dir) + end. +very_icky(suite) -> + []; +very_icky(doc) -> + "Check file operations on normal file names regardless of unicode mode"; +very_icky(Config) when is_list(Config) -> + {ok,Dir} = file:get_cwd(), + try + Priv = ?config(priv_dir, Config), + file:set_cwd(Priv), + put(file_module,prim_file), + case check_very_icky(prim_file) of + need_unicode_mode -> + {skipped,"VM needs to be started in Unicode filename mode"}; + ok -> + put(file_module,file), + ok = check_very_icky(file) + end + after + file:set_cwd(Dir) + end. + + +check_normal(Mod) -> + {ok,Dir} = Mod:get_cwd(), + try + ?line make_normal_dir(Mod), + ?line {ok, L0} = Mod:list_dir("."), + ?line L1 = lists:sort(L0), + %erlang:display(L1), + ?line L1 = lists:sort(list(normal_dir())), + ?line {ok,D2} = Mod:get_cwd(), + ?line true = is_list(D2), + ?line case Mod:altname("fil1") of + {error,enotsup} -> + ok; + {ok,LLL} when is_list(LLL) -> + ok + end, + ?line [ true = is_list(El) || El <- L1], + ?line Syms = [ {S,Targ,list_to_binary(get_data(Targ,normal_dir()))} + || {T,S,Targ} <- normal_dir(), T =:= symlink ], + ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], + ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line chk_cre_dir(Mod,[{directory,"temp_dir",normal_dir()}]), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line true = is_list(BeginAt), + ?line {error,enoent} = Mod:set_cwd("tmp_dir"), + ?line ok = Mod:set_cwd("temp_dir"), + ?line {ok, NowAt} = Mod:get_cwd(), + ?line true = BeginAt =/= NowAt, + ?line ok = Mod:set_cwd(".."), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line rm_r(Mod,"temp_dir"), + ?line true = is_list(Dir), + ?line [ true = is_list(FN) || FN <- L0 ], + case has_links() of + true -> + ?line ok = Mod:make_link("fil1","nisse"), + ?line {ok, <<"fil1">>} = Mod:read_file("nisse"), + ?line {ok, #file_info{type = regular}} = Mod:read_link_info("nisse"), + ?line ok = Mod:delete("nisse"), + ?line {ok, <<"fil1">>} = Mod:read_file("fil1"), + ?line {error,enoent} = Mod:read_file("nisse"), + ?line {error,enoent} = Mod:read_link_info("nisse"); + false -> + ok + end, + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read]), + ?line {ok, Content} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- normal_dir() ], + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read,binary]), + ?line BC = list_to_binary(Content), + ?line {ok, BC} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- normal_dir() ], + ?line Mod:rename("fil1","tmp_fil1"), + ?line {ok, <<"fil1">>} = Mod:read_file("tmp_fil1"), + ?line {error,enoent} = Mod:read_file("fil1"), + ?line Mod:rename("tmp_fil1","fil1"), + ?line {ok, <<"fil1">>} = Mod:read_file("fil1"), + ?line {error,enoent} = Mod:read_file("tmp_fil1"), + ?line {ok,FI} = Mod:read_file_info("fil1"), + ?line NewMode = FI#file_info.mode band (bnot 8#333), + ?line NewMode2 = NewMode bor 8#222, + ?line true = NewMode2 =/= NewMode, + ?line ok = Mod:write_file_info("fil1",FI#file_info{mode = NewMode}), + ?line {ok,#file_info{mode = NewMode}} = Mod:read_file_info("fil1"), + ?line ok = Mod:write_file_info("fil1",FI#file_info{mode = NewMode2}), + ?line {ok,#file_info{mode = NewMode2}} = Mod:read_file_info("fil1"), + ok + after + case Mod:read_file_info("fil1") of + {ok,FII} -> + NewModeI = FII#file_info.mode bor 8#777, + Mod:write_file_info("fil1",FII#file_info{mode = NewModeI}); + _ -> + ok + end, + Mod:set_cwd(Dir), + io:format("Wd now: ~s~n",[Dir]) + end. + +check_icky(Mod) -> + {ok,Dir} = Mod:get_cwd(), + try + ?line true=(length("åäö") =:= 3), + ?line UniMode = file:native_name_encoding() =/= latin1, + ?line make_icky_dir(Mod), + ?line {ok, L0} = Mod:list_dir("."), + ?line L1 = lists:sort(L0), + io:format("~p ~p~n",[L1,list(icky_dir())]), + ?line L1 = lists:sort(convlist(list(icky_dir()))), + ?line {ok,D2} = Mod:get_cwd(), + ?line true = is_list(D2), +%% Altname only on windows, and there are no non native filenames there +%% ?line case Mod:altname("fil1") of +%% {error,enotsup} -> +%% ok; +%% {ok,LLL} when is_list(LLL) -> +%% ok +%% end, + ?line [ true = ((is_list(El) or (UniMode and is_binary(El)))) || El <- L1], + ?line Syms = [ {S,conv(Targ),list_to_binary(get_data(Targ,icky_dir()))} + || {T,S,Targ} <- icky_dir(), T =:= symlink ], + ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], + ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line chk_cre_dir(Mod,[{directory,"åäö_dir",icky_dir()}]), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line true = is_list(BeginAt), + ?line {error,enoent} = Mod:set_cwd("åä_dir"), + ?line ok = Mod:set_cwd("åäö_dir"), + ?line {ok, NowAt} = Mod:get_cwd(), + ?line true = is_list(NowAt), + ?line true = BeginAt =/= NowAt, + ?line ok = Mod:set_cwd(".."), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line rm_r2(Mod,"åäö_dir"), + {OS,TYPE} = os:type(), + % Check that treat_icky really converts to the same as the OS + case UniMode of + true -> + ?line chk_cre_dir(Mod,[{directory,"åäö_dir",[]}]), + ?line ok = Mod:set_cwd("åäö_dir"), + ?line ok = Mod:write_file(<<"ååå">>,<<"hello">>), + ?line Treated = treat_icky(<<"ååå">>), + ?line {ok,[Treated]} = Mod:list_dir("."), + ?line ok = Mod:delete(<<"ååå">>), + ?line {ok,[]} = Mod:list_dir("."), + ?line ok = Mod:set_cwd(".."), + ?line rm_r2(Mod,"åäö_dir"); + false -> + ok + end, + + ?line chk_cre_dir(Mod,[{directory,treat_icky(<<"åäö_dir">>),icky_dir()}]), + if + UniMode and (OS =/= win32) -> + ?line {error,enoent} = Mod:set_cwd("åäö_dir"); + true -> + ok + end, + ?line ok = Mod:set_cwd(treat_icky(<<"åäö_dir">>)), + ?line {ok, NowAt2} = Mod:get_cwd(), + io:format("~p~n",[NowAt2]), + % Cannot create raw unicode-breaking filenames on windows or macos + ?line true = ((((not UniMode) or (OS =:= win32) or (TYPE=:=darwin)) and is_list(NowAt2)) orelse ((UniMode) and is_binary(NowAt2))), + ?line true = BeginAt =/= NowAt2, + ?line ok = Mod:set_cwd(".."), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line rm_r2(Mod,conv(treat_icky(<<"åäö_dir">>))), + case has_links() of + true -> + ?line ok = Mod:make_link("fil1","nisseö"), + ?line {ok, <<"fil1">>} = Mod:read_file("nisseö"), + ?line {ok, #file_info{type = regular}} = Mod:read_link_info("nisseö"), + ?line ok = Mod:delete("nisseö"), + ?line ok = Mod:make_link("fil1",treat_icky(<<"nisseö">>)), + ?line {ok, <<"fil1">>} = Mod:read_file(treat_icky(<<"nisseö">>)), + ?line {ok, #file_info{type = regular}} = Mod:read_link_info(treat_icky(<<"nisseö">>)), + ?line ok = Mod:delete(treat_icky(<<"nisseö">>)), + ?line {ok, <<"fil1">>} = Mod:read_file("fil1"), + ?line {error,enoent} = Mod:read_file("nisseö"), + ?line {error,enoent} = Mod:read_link_info("nisseö"), + ?line {error,enoent} = Mod:read_file(treat_icky(<<"nisseö">>)), + ?line {error,enoent} = Mod:read_link_info(treat_icky(<<"nisseö">>)); + false -> + ok + end, + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read]), + ?line {ok, Content} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- icky_dir() ], + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read,binary]), + ?line BC = list_to_binary([Content]), + ?line {ok, BC} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- icky_dir() ], + ?line Mod:rename("åäö2","åäö_fil1"), + ?line {ok, <<"åäö2">>} = Mod:read_file("åäö_fil1"), + ?line {error,enoent} = Mod:read_file("åäö2"), + ?line Mod:rename("åäö_fil1","åäö2"), + ?line {ok, <<"åäö2">>} = Mod:read_file("åäö2"), + ?line {error,enoent} = Mod:read_file("åäö_fil1"), + + ?line Mod:rename("åäö2",treat_icky(<<"åäö_fil1">>)), + ?line {ok, <<"åäö2">>} = Mod:read_file(treat_icky(<<"åäö_fil1">>)), + if + UniMode and (OS =/= win32) -> + {error,enoent} = Mod:read_file("åäö_fil1"); + true -> + ok + end, + ?line {error,enoent} = Mod:read_file("åäö2"), + ?line Mod:rename(treat_icky(<<"åäö_fil1">>),"åäö2"), + ?line {ok, <<"åäö2">>} = Mod:read_file("åäö2"), + ?line {error,enoent} = Mod:read_file("åäö_fil1"), + ?line {error,enoent} = Mod:read_file(treat_icky(<<"åäö_fil1">>)), + + ?line {ok,FI} = Mod:read_file_info("åäö2"), + ?line NewMode = FI#file_info.mode band (bnot 8#333), + ?line NewMode2 = NewMode bor 8#222, + ?line true = NewMode2 =/= NewMode, + ?line ok = Mod:write_file_info("åäö2",FI#file_info{mode = NewMode}), + ?line {ok,#file_info{mode = NewMode}} = Mod:read_file_info("åäö2"), + ?line ok = Mod:write_file_info("åäö2",FI#file_info{mode = NewMode2}), + ?line {ok,#file_info{mode = NewMode2}} = Mod:read_file_info("åäö2"), + + ?line {ok,FII} = Mod:read_file_info(treat_icky(<<"åäö5">>)), + ?line true = NewMode2 =/= NewMode, + ?line ok = Mod:write_file_info(treat_icky(<<"åäö5">>),FII#file_info{mode = NewMode}), + ?line {ok,#file_info{mode = NewMode}} = Mod:read_file_info(treat_icky(<<"åäö5">>)), + ?line ok = Mod:write_file_info(<<"åäö5">>,FII#file_info{mode = NewMode2}), + ?line {ok,#file_info{mode = NewMode2}} = Mod:read_file_info(treat_icky(<<"åäö5">>)), + ok + after + Mod:set_cwd(Dir), + io:format("Wd now: ~s~n",[Dir]) + end. + +check_very_icky(Mod) -> + {ok,Dir} = Mod:get_cwd(), + try + ?line true=(length("åäö") =:= 3), + ?line UniMode = file:native_name_encoding() =/= latin1, + if + not UniMode -> + throw(need_unicode_mode); + true -> + ok + end, + ?line make_very_icky_dir(Mod), + ?line {ok, L0} = Mod:list_dir("."), + ?line L1 = lists:sort(L0), + ?line L1 = lists:sort(convlist(list(very_icky_dir()))), + ?line {ok,D2} = Mod:get_cwd(), + ?line true = is_list(D2), + ?line [ true = ((is_list(El) or is_binary(El))) || El <- L1], + ?line Syms = [ {S,conv(Targ),list_to_binary(get_data(Targ,very_icky_dir()))} + || {T,S,Targ} <- very_icky_dir(), T =:= symlink ], + ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], + ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line chk_cre_dir(Mod,[{directory,[1088,1079,1091]++"_dir",very_icky_dir()}]), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line true = is_list(BeginAt), + ?line {error,enoent} = Mod:set_cwd("åä_dir"), + ?line ok = Mod:set_cwd([1088,1079,1091]++"_dir"), + ?line {ok, NowAt} = Mod:get_cwd(), + ?line true = is_list(NowAt), + ?line true = BeginAt =/= NowAt, + ?line ok = Mod:set_cwd(".."), + ?line {ok,BeginAt} = Mod:get_cwd(), + ?line rm_r2(Mod,[1088,1079,1091]++"_dir"), + + case has_links() of + true -> + ?line ok = Mod:make_link("fil1","nisse"++[1088,1079,1091]), + ?line {ok, <<"fil1">>} = + Mod:read_file("nisse"++[1088,1079,1091]), + ?line {ok, #file_info{type = regular}} = + Mod:read_link_info("nisse"++[1088,1079,1091]), + ?line ok = Mod:delete("nisse"++[1088,1079,1091]), + ?line ok = Mod:make_link("fil1",<<"nisseö">>), + ?line {ok, <<"fil1">>} = Mod:read_file(<<"nisseö">>), + ?line {ok, #file_info{type = regular}} = + Mod:read_link_info(<<"nisseö">>), + ?line ok = Mod:delete(<<"nisseö">>), + ?line {ok, <<"fil1">>} = Mod:read_file("fil1"), + ?line {error,enoent} = Mod:read_file("nisse"++[1088,1079,1091]), + ?line {error,enoent} = Mod:read_link_info("nisse"++[1088,1079,1091]), + ?line {error,enoent} = Mod:read_file(<<"nisseö">>), + ?line {error,enoent} = Mod:read_link_info(<<"nisseö">>); + false -> + ok + end, + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read]), + ?line {ok, Content} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- very_icky_dir() ], + ?line [ begin + ?line {ok, FD} = Mod:open(Name,[read,binary]), + ?line BC = list_to_binary([Content]), + ?line {ok, BC} = Mod:read(FD,1024), + ?line ok = file:close(FD) + end || {regular,Name,Content} <- very_icky_dir() ], + ?line Mod:rename([956,965,963,954,959,49], + [956,965,963,954,959]++"_fil1"), + ?line {ok, <<"åäö2">>} = Mod:read_file([956,965,963,954,959]++"_fil1"), + ?line {error,enoent} = Mod:read_file([956,965,963,954,959,49]), + ?line Mod:rename([956,965,963,954,959]++"_fil1",[956,965,963,954,959,49]), + ?line {ok, <<"åäö2">>} = Mod:read_file([956,965,963,954,959,49]), + ?line {error,enoent} = Mod:read_file([956,965,963,954,959]++"_fil1"), + + ?line {ok,FI} = Mod:read_file_info([956,965,963,954,959,49]), + ?line NewMode = FI#file_info.mode band (bnot 8#333), + ?line NewMode2 = NewMode bor 8#222, + ?line true = NewMode2 =/= NewMode, + ?line ok = Mod:write_file_info([956,965,963,954,959,49], + FI#file_info{mode = NewMode}), + ?line {ok,#file_info{mode = NewMode}} = + Mod:read_file_info([956,965,963,954,959,49]), + ?line ok = Mod:write_file_info([956,965,963,954,959,49], + FI#file_info{mode = NewMode2}), + ?line {ok,#file_info{mode = NewMode2}} = + Mod:read_file_info([956,965,963,954,959,49]), + ok + catch + throw:need_unicode_mode -> + io:format("Sorry, can only run in unicode mode.~n"), + need_unicode_mode + after + Mod:set_cwd(Dir), + io:format("Wd now: ~s~n",[Dir]) + end. + +%% +%% Utilities +%% + + +rm_rf(Mod,Dir) -> + case Mod:read_link_info(Dir) of + {ok, #file_info{type = directory}} -> + {ok, Content} = Mod:list_dir(Dir), + [ rm_rf(Mod,filename:join(Dir,C)) || C <- Content ], + Mod:del_dir(Dir), + ok; + {ok, #file_info{}} -> + Mod:delete(Dir); + _ -> + ok + end. + +rm_r(Mod,Dir) -> + %erlang:display({rm_r,Dir}), + case Mod:read_link_info(Dir) of + {ok, #file_info{type = directory}} -> + {ok,#file_info{type = directory}} = Mod:read_file_info(Dir), + {ok, Content} = Mod:list_dir(Dir), + [ true = is_list(Part) || Part <- Content ], + [ true = is_list(filename:join(Dir,Part)) || Part <- Content ], + [ rm_r(Mod,filename:join(Dir,C)) || C <- Content ], + ok = Mod:del_dir(Dir), + ok; + {ok, #file_info{type = regular}} -> + {ok,#file_info{type = regular}} = Mod:read_file_info(Dir), + ok = Mod:delete(Dir); + {ok, #file_info{type = symlink}} -> + ok = Mod:delete(Dir) + end. +%% For icky test, allow binaries sometimes +rm_r2(Mod,Dir) -> + %erlang:display({rm_r2,Dir}), + case Mod:read_link_info(Dir) of + {ok, #file_info{type = directory}} -> + {ok,#file_info{type = directory}} = Mod:read_file_info(Dir), + {ok, Content} = Mod:list_dir(Dir), + UniMode = file:native_name_encoding() =/= latin1, + [ true = (is_list(Part) orelse UniMode) || Part <- Content ], + [ true = (is_list(filename:join(Dir,Part)) orelse UniMode) || Part <- Content ], + [ rm_r2(Mod,filename:join(Dir,C)) || C <- Content ], + ok = Mod:del_dir(Dir), + ok; + {ok, #file_info{type = regular}} -> + {ok,#file_info{type = regular}} = Mod:read_file_info(Dir), + ok = Mod:delete(Dir); + {ok, #file_info{type = symlink}} -> + ok = Mod:delete(Dir) + end. +chk_cre_dir(_,[]) -> + ok; +chk_cre_dir(Mod,[{regular,Name,Content}|T]) -> + ok = Mod:write_file(Name,Content), + chk_cre_dir(Mod,T); +chk_cre_dir(Mod,[{link,Name,Target}|T]) -> + ok = Mod:make_link(Target,Name), + chk_cre_dir(Mod,T); +chk_cre_dir(Mod,[{symlink,Name,Target}|T]) -> + ok = Mod:make_symlink(Target,Name), + chk_cre_dir(Mod,T); +chk_cre_dir(Mod,[{directory,Name,Content}|T]) -> + ok = Mod:make_dir(Name), + Content2 = [{Ty,filename:join(Name,N),case Ty of link -> filename:join(Name,C); _ -> C end} || {Ty,N,C} <- Content ], + chk_cre_dir(Mod,Content2), + chk_cre_dir(Mod,T). + +has_links() -> + case os:type() of + {win32,_} -> + case os:version() of + {N,NN,_} when (N > 5) orelse (NN > 1) -> + true; + _ -> + false + end; + _ -> + true + end. + +make_normal_dir(Mod) -> + rm_rf(Mod,"normal_dir"), + Mod:make_dir("normal_dir"), + Mod:set_cwd("normal_dir"), + Mod:write_file("fil1","fil1"), + Mod:write_file("fil2","fil2"), + case has_links() of + true -> + Mod:make_link("fil2","fil3"), + Mod:make_symlink("fil2","fil4"); + _ -> + ok + end, + Mod:make_dir("subdir"), + Mod:write_file(filename:join("subdir","subfil1"),"subfil1"), + ok. + +normal_dir() -> + [{regular,"fil1","fil1"}, + {regular,"fil2","fil2"}] ++ + case has_links() of + true -> + [{regular,"fil3","fil2"}, + {symlink,"fil4","fil2"}]; + false -> + [] + end ++ + [{directory,"subdir", + [{regular,"subfil1","subfil1"}]}]. + +make_icky_dir(Mod) -> + rm_rf(Mod,"icky_dir"), + Icky=icky_dir(), + chk_cre_dir(Mod,[{directory,"icky_dir",linkify([],Icky)}]), + Mod:set_cwd("icky_dir"), + ok. + +linkify(_Passed,[]) -> + []; +linkify(Passed,[{regular,Name,Content}|T]) -> + Regulars = [ {N,C} || {regular,N,C} <- Passed, N =/= Name ], + case lists:keysearch(Content,2,Regulars) of + {value, {Linkto, Content}} -> + [{link,Name,Linkto} | linkify(Passed,T)]; + _ -> + [{regular,Name,Content} | linkify([{regular,Name,Content}|Passed],T)] + end; +linkify(Passed,[{directory, Name, Content}|T]) -> + [{directory,Name, linkify(Content,Content)}|linkify(Passed,T)]; +linkify(Passed,[H|T]) -> + [H|linkify([H|Passed],T)]. + +icky_dir() -> + [{regular,"fil1","fil1"}, + {regular,"åäö2","åäö2"}] ++ + case has_links() of + true -> + [{regular,"åäö3","åäö2"}, + {symlink,"åäö4","åäö2"}]; + false -> + [] + end ++ + [{regular,treat_icky(<<"åäö5">>),"åäö5"}] ++ + case has_links() of + true -> + [{symlink,treat_icky(<<"åäö6">>),treat_icky(<<"åäö5">>)}]; + false -> + [] + end ++ + [{directory,treat_icky(<<"åäösubdir2">>), + [{regular,treat_icky(<<"åäösubfil2">>),"åäösubfil12"}, + {regular,"åäösubfil3","åäösubfil13"}]}, + {directory,"åäösubdir", + [{regular,"åäösubfil1","åäösubfil1"}]}]. + +make_very_icky_dir(Mod) -> + rm_rf(Mod,"very_icky_dir"), + Icky=very_icky_dir(), + chk_cre_dir(Mod,[{directory,"very_icky_dir",linkify([],Icky)}]), + Mod:set_cwd("very_icky_dir"), + ok. + +very_icky_dir() -> + [{regular,"fil1","fil1"}, + {regular,[956,965,963,954,959,49],"åäö2"}] ++ + case has_links() of + true -> + [{regular,[956,965,963,954,959,50],"åäö2"}, + {symlink,[956,965,963,954,959,51],[956,965,963,954,959,49]}]; + false -> + [] + end ++ + [{regular,treat_icky(<<"åäö5">>),"åäö5"}] ++ + case has_links() of + true -> + [{symlink,treat_icky(<<"åäö6">>),treat_icky(<<"åäö5">>)}]; + false -> + [] + end ++ + [{directory,treat_icky(<<"åäösubdir2">>), + [{regular,treat_icky(<<"åäösubfil2">>),"åäösubfil12"}, + {regular,"åäösubfil3","åäösubfil13"}]}, + {directory,[956,965,963,954,959]++"subdir1", + [{regular,[956,965,963,954,959]++"subfil1","åäösubfil1"}]}]. + +%% Some OS'es simply do not allow non UTF8 filenames +treat_icky(Bin) -> + case os:type() of + {unix,darwin} -> + procentify(Bin); + {win32,_} -> + binary_to_list(Bin); + _ -> + Bin + end. + +procentify(<<>>) -> + <<>>; +procentify(<>) when X > 127 -> + T=procentify(Rst), + Y = list_to_binary([$% + | io_lib:format("~2.16B",[X])]), + <>; +procentify(<>) -> + T=procentify(Rst), + <>. + + +list([]) -> + []; +list([{_,Name,_} | T]) -> + [Name | list(T)]. + + +get_data(FN,List) -> + case lists:keysearch(FN,2,List) of + {value,{regular,FN,C}} -> + C; + {value,{symlink,FN,NewFN}} -> + get_data(NewFN,List); + _-> + [] + end. + + +convlist(L) -> + convlist(file:native_name_encoding(),L). +convlist(latin1,[Bin|T]) when is_binary(Bin) -> + %erlang:display('Convert...'), + [binary_to_list(Bin)| convlist(latin1,T)]; +convlist(Any,[H|T]) -> + [H|convlist(Any,T)]; +convlist(_,[]) -> + []. + +conv(L) -> + NoUniMode = file:native_name_encoding() =:= latin1, + if + NoUniMode, is_binary(L) -> + binary_to_list(L); + true -> + L + end. + + + + -- cgit v1.2.3 From 4161de166d46b6c9c25d5ac813c94a025c7a029d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 8 Nov 2010 17:25:01 +0100 Subject: Teach file to accept codepoints beyond 255. --- lib/kernel/src/file.erl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 579b9132c6..08825c1c57 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -1037,20 +1037,22 @@ file_name(N) when is_binary(N) -> N; file_name(N) -> try - file_name_1(N) + file_name_1(N,file:native_name_encoding()) catch Reason -> {error, Reason} end. -file_name_1([C|T]) when is_integer(C), C > 0, C =< 255 -> - [C|file_name_1(T)]; -file_name_1([H|T]) -> - file_name_1(H) ++ file_name_1(T); -file_name_1([]) -> +file_name_1([C|T],latin1) when is_integer(C), C < 256-> + [C|file_name_1(T,latin1)]; +file_name_1([C|T],utf8) when is_integer(C) -> + [C|file_name_1(T,utf8)]; +file_name_1([H|T],E) -> + file_name_1(H,E) ++ file_name_1(T,E); +file_name_1([],_) -> []; -file_name_1(N) when is_atom(N) -> +file_name_1(N,_) when is_atom(N) -> atom_to_list(N); -file_name_1(_) -> +file_name_1(_,_) -> throw(badarg). make_binary(Bin) when is_binary(Bin) -> -- cgit v1.2.3 From 9622ab2132e2501ee5769357a914dcc6635e515c Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 9 Nov 2010 18:08:57 +0100 Subject: Convert filenames read on MacOSX to canonical form --- erts/emulator/beam/bif.tab | 1 + erts/emulator/beam/erl_unicode.c | 416 ++++-- erts/emulator/beam/erl_unicode_normalize.h | 1687 ++++++++++++++++++++++++ erts/emulator/beam/global.h | 11 + erts/emulator/beam/sys.h | 3 +- erts/emulator/internal_doc/dec.dat | 942 +++++++++++++ erts/emulator/internal_doc/dec.erl | 237 ++++ erts/emulator/sys/common/erl_sys_common_misc.c | 18 +- lib/kernel/test/file_name_SUITE.erl | 980 +++++++++++++- 9 files changed, 4204 insertions(+), 91 deletions(-) create mode 100644 erts/emulator/beam/erl_unicode_normalize.h create mode 100644 erts/emulator/internal_doc/dec.dat create mode 100644 erts/emulator/internal_doc/dec.erl diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 3de5e63a63..60b4b1946b 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -799,6 +799,7 @@ bif erlang:nif_error/2 # bif prim_file:internal_name2native/1 bif prim_file:internal_native2name/1 +bif prim_file:internal_normalize_utf8/1 bif file:native_name_encoding/0 # # Obsolete diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 4f26f078ce..1e729cfc2e 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -30,6 +30,8 @@ #include "big.h" #include "erl_unicode.h" +#include "erl_unicode_normalize.h" + typedef struct _restart_context { byte *bytes; @@ -54,13 +56,6 @@ static BIF_RETTYPE finalize_list_to_list(Process *p, Uint num_resulting_chars, int state, int left, Eterm tail); -static int analyze_utf8(byte *source, Uint size, - byte **err_pos, Uint *num_chars, int *left); -#define UTF8_OK 0 -#define UTF8_INCOMPLETE 1 -#define UTF8_ERROR 2 -#define UTF8_ANALYZE_MORE 3 - static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3); static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3); static BIF_RETTYPE characters_to_list_trap_2(BIF_ALIST_3); @@ -970,11 +965,11 @@ static int is_valid_utf8(Eterm orig_bin) bytes = erts_get_aligned_binary_bytes(orig_bin, &temp_alloc); } size = binary_size(orig_bin); - ret = analyze_utf8(bytes, + ret = erts_analyze_utf8(bytes, size, &endpos,&numchar,NULL); erts_free_aligned_binary_bytes(temp_alloc); - return (ret == UTF8_OK); + return (ret == ERTS_UTF8_OK); } BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2) @@ -1084,14 +1079,14 @@ static BIF_RETTYPE build_list_return(Process *p, byte *bytes, int pos, Uint char hp += 2; rest_term = CONS(hp,leftover_bin,rest_term); } - BIF_RET(finalize_list_to_list(p, bytes, rest_term, 0U, pos, characters, UTF8_ERROR, left, NIL)); + BIF_RET(finalize_list_to_list(p, bytes, rest_term, 0U, pos, characters, ERTS_UTF8_ERROR, left, NIL)); } else if (rest_term == NIL && num_leftovers != 0) { Eterm leftover_bin = new_binary(p, leftover, num_leftovers); if (check_leftovers(leftover,num_leftovers) != 0) { - BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, UTF8_ERROR, + BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, ERTS_UTF8_ERROR, left, NIL)); } else { - BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, UTF8_INCOMPLETE, + BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, ERTS_UTF8_INCOMPLETE, left, NIL)); } } else { /* All OK */ @@ -1107,11 +1102,11 @@ static BIF_RETTYPE build_list_return(Process *p, byte *bytes, int pos, Uint char rc.num_processed_bytes = 0; /* not used */ rc.num_bytes_to_process = pos; rc.num_resulting_chars = characters; - rc.state = UTF8_OK; /* not used */ + rc.state = ERTS_UTF8_OK; /* not used */ BIF_TRAP3(&characters_to_list_trap_1_exp, p, make_magic_bin_for_restart(p,&rc), rest_term, latin1); } else { /* Success */ - BIF_RET(finalize_list_to_list(p, bytes, NIL, 0U, pos, characters, UTF8_OK, left, NIL)); + BIF_RET(finalize_list_to_list(p, bytes, NIL, 0U, pos, characters, ERTS_UTF8_OK, left, NIL)); } } } @@ -1205,7 +1200,7 @@ BIF_RETTYPE unicode_characters_to_list_2(BIF_ALIST_2) * When input to characters_to_list is a plain binary and the format is 'unicode', we do * a faster analyze and size count with this function. */ -static int analyze_utf8(byte *source, Uint size, +int erts_analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left) { *err_pos = source; @@ -1216,60 +1211,60 @@ static int analyze_utf8(byte *source, Uint size, --size; } else if (((*source) & ((byte) 0xE0)) == 0xC0) { if (size < 2) { - return UTF8_INCOMPLETE; + return ERTS_UTF8_INCOMPLETE; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((*source) < 0xC2) /* overlong */) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } source += 2; size -= 2; } else if (((*source) & ((byte) 0xF0)) == 0xE0) { if (size < 3) { - return UTF8_INCOMPLETE; + return ERTS_UTF8_INCOMPLETE; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } if ((((*source) & ((byte) 0xF)) == 0xD) && ((source[1] & 0x20) != 0)) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } if (((*source) == 0xEF) && (source[1] == 0xBF) && ((source[2] == 0xBE) || (source[2] == 0xBF))) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } source += 3; size -= 3; } else if (((*source) & ((byte) 0xF8)) == 0xF0) { if (size < 4) { - return UTF8_INCOMPLETE; + return ERTS_UTF8_INCOMPLETE; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || ((source[3] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } if ((((*source) & ((byte)0x7)) > 0x4U) || ((((*source) & ((byte)0x7)) == 0x4U) && ((source[1] & ((byte)0x3F)) > 0xFU))) { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } source += 4; size -= 4; } else { - return UTF8_ERROR; + return ERTS_UTF8_ERROR; } ++(*num_chars); *err_pos = source; if (left && --(*left) <= 0) { - return UTF8_ANALYZE_MORE; + return ERTS_UTF8_ANALYZE_MORE; } } - return UTF8_OK; + return ERTS_UTF8_OK; } /* @@ -1304,7 +1299,7 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, } else if (((*source) & ((byte) 0xE0)) == 0xC0) { unipoint = (((Uint) ((*source) & ((byte) 0x1F))) << 6) | - ((Uint) (source[1] & ((byte) 0x3F))); + ((Uint) (source[1] & ((byte) 0x3F))); } else if (((*source) & ((byte) 0xF0)) == 0xE0) { unipoint = (((Uint) ((*source) & ((byte) 0xF))) << 12) | @@ -1330,6 +1325,216 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, return ret; } +static int is_candidate(Uint cp) +{ + int index,pos; + if (cp < 768) return 0; + if (cp > 4023) { + if (cp == 12441 || cp == 12442) return 1; + return 0; + } + index = cp / 32 - COMP_CANDIDATE_MAP_OFFSET; + pos = cp % 32; + return !!(comp_candidate_map[index] & (1UL << pos)); +} + +static int hashsearch(int *htab, int htab_size, CompEntry *cv, Uint16 c) +{ + int bucket = c % htab_size; + while (htab[bucket] != -1 && cv[htab[bucket]].c != c) + bucket = (bucket + 1) % htab_size; + return htab[bucket]; +} + +#define TRANSLATE_NO 0 +#define TRANSLATE_MAYBE -1 + +/* The s array is reversed */ +static int translate(Uint16 *s, int slen, Uint16 *res) +{ + /* Go backwards through buffer and match against tree */ + int pos = 0; + CompEntry *cv = compose_tab; + int *hc = hash_compose_tab; + int cvs = compose_tab_size; + int x; + while (pos < slen) { + x = hashsearch(hc,cvs*HASH_SIZE_FACTOR,cv,s[pos]); + if (x < 0) { + return TRANSLATE_NO; + } + if (cv[x].res) { + *res = cv[x].res; + return pos; + } + cvs = cv[x].num_subs; + hc = cv[x].hash; + cv = cv[x].subs; + ++pos; + } + return TRANSLATE_MAYBE; +} + +static void handle_first_norm(Uint16 *savepoints, int *numpointsp, Uint unipoint) +{ + /*erts_fprintf(stderr,"CP = %d, numpoints = %d\n",(int) unipoint,(int) *numpointsp);*/ + *numpointsp = 1; + savepoints[0] = (Uint16) unipoint; +} + +static void cleanup_norm(Eterm **hpp, Uint16 *savepoints, int numpoints, Eterm *retp) +{ + Eterm *hp = *hpp; + int res,i; + Uint16 newpoint; + Eterm ret = *retp; + + ret = CONS(hp,make_small((Uint) savepoints[0]),ret); + hp += 2; + + for (i = 1;i < numpoints;) { + if(!is_candidate(savepoints[i]) || + ((res = translate(savepoints+i,numpoints - i, &newpoint)) <= 0)) { + ret = CONS(hp,make_small((Uint) savepoints[i]),ret); + hp += 2; + ++i; + } else { + ret = CONS(hp,make_small((Uint) newpoint),ret); + hp += 2; + i += res; + } + } + *retp = ret; +} + +static void handle_potential_norm(Eterm **hpp, Uint16 *savepoints, int *numpointsp, Uint unipoint, Eterm *retp) +{ + Eterm *hp = *hpp; + int numpoints = *numpointsp; + int res,i; + Uint16 newpoint; + Eterm ret = *retp; + + /* erts_fprintf(stderr,"CP = %d, numpoints = %d\n",(int) unipoint,(int) numpoints);*/ + if ((unipoint >> 16) == 0) { /* otherwise we're done here */ + savepoints[numpoints++] = (Uint16) unipoint; + res = translate(savepoints,numpoints,&newpoint); + if (res == TRANSLATE_NO) { + ret = CONS(hp,make_small((Uint) savepoints[0]),ret); + hp += 2; + for (i = 1;i < numpoints;) { + if(!is_candidate(savepoints[i]) || + ((res = translate(savepoints+i,numpoints - i, &newpoint)) == 0)) { + ret = CONS(hp,make_small((Uint) savepoints[i]),ret); + hp += 2; + ++i; + } else if (res > 0) { + ret = CONS(hp,make_small((Uint) newpoint),ret); + hp += 2; + i += res; + } else { /* res < 0 */ + /* A "maybe", means we are not done yet */ + int j = 0; + while (i < numpoints) { + savepoints[j++] = savepoints[i++]; + } + numpoints = j; + goto breakaway; + } + } + numpoints = 0; + breakaway: + ; + } else if (res > 0) { + numpoints = 0; + ret = CONS(hp,make_small((Uint) newpoint),ret); + hp += 2; + } /* < 0 means go on */ + } else { + /* Unconditional rollup, this character is larger than 16 bit */ + ret = CONS(hp,make_small((Uint) savepoints[0]),ret); + hp += 2; + + for (i = 1;i < numpoints;) { + if(!is_candidate(savepoints[i]) || + ((res = translate(savepoints+i,numpoints - i, &newpoint)) <= 0)) { + ret = CONS(hp,make_small((Uint) savepoints[i]),ret); + hp += 2; + ++i; + } else { + ret = CONS(hp,make_small((Uint) newpoint),ret); + hp += 2; + i += res; + } + } + ret = CONS(hp,make_small(unipoint),ret); + hp += 2; + numpoints = 0; + } + *hpp = hp; + *numpointsp = numpoints; + *retp = ret; +} + +static Eterm do_utf8_to_list_normalize(Process *p, Uint num, byte *bytes, Uint sz) +{ + Eterm *hp,*hp_end; + Eterm ret; + byte *source; + Uint unipoint; + Uint16 savepoints[4]; + int numpoints = 0; + + ASSERT(num > 0); + + hp = HAlloc(p,num * 2); /* May be to much */ + hp_end = hp + num * 2; + ret = NIL; + source = bytes + sz; + while(--source >= bytes) { + if (((*source) & ((byte) 0x80)) == 0) { + unipoint = (Uint) *source; + } else if (((*source) & ((byte) 0xE0)) == 0xC0) { + unipoint = + (((Uint) ((*source) & ((byte) 0x1F))) << 6) | + ((Uint) (source[1] & ((byte) 0x3F))); + } else if (((*source) & ((byte) 0xF0)) == 0xE0) { + unipoint = + (((Uint) ((*source) & ((byte) 0xF))) << 12) | + (((Uint) (source[1] & ((byte) 0x3F))) << 6) | + ((Uint) (source[2] & ((byte) 0x3F))); + } else if (((*source) & ((byte) 0xF8)) == 0xF0) { + unipoint = + (((Uint) ((*source) & ((byte) 0x7))) << 18) | + (((Uint) (source[1] & ((byte) 0x3F))) << 12) | + (((Uint) (source[2] & ((byte) 0x3F))) << 6) | + ((Uint) (source[3] & ((byte) 0x3F))); + } else { + /* ignore 2#10XXXXXX */ + continue; + } + if (numpoints) { + handle_potential_norm(&hp,savepoints,&numpoints,unipoint,&ret); + continue; + } + /* We are not building up any normalizations yet, look that we shouldn't start... */ + if (is_candidate(unipoint)) { + handle_first_norm(savepoints,&numpoints,unipoint); + continue; + } + ret = CONS(hp,make_small(unipoint),ret); + hp += 2; + } + /* so, we'we looped to the beginning, do we have anything saved? */ + if (numpoints) { + cleanup_norm(&hp,savepoints,numpoints,&ret); + } + if (hp_end != hp) { + HRelease(p,hp_end,hp); + } + return ret; +} + /* * The last step of characters_to_list, build a list from the buffer 'bytes' (created in the same way * as for characters_to_utf8). All sizes are known in advance and most data will be held in a @@ -1378,10 +1583,10 @@ static BIF_RETTYPE finalize_list_to_list(Process *p, */ free_restart(bytes); - if (state == UTF8_INCOMPLETE) { + if (state == ERTS_UTF8_INCOMPLETE) { hp = HAlloc(p,4); ret = TUPLE3(hp,am_incomplete,converted,rest); - } else if (state == UTF8_ERROR) { + } else if (state == ERTS_UTF8_ERROR) { hp = HAlloc(p,4); ret = TUPLE3(hp,am_error,converted,rest); } else { @@ -1408,7 +1613,7 @@ static BIF_RETTYPE characters_to_list_trap_2(BIF_ALIST_3) /* * Hooks into the process of decoding a binary depending on state. - * If last_state is UTF8_ANALYZE_MORE, num_bytes_to_process + * If last_state is ERTS_UTF8_ANALYZE_MORE, num_bytes_to_process * and num_resulting_chars will grow * until we're done analyzing the binary. Then we'll eat * the bytes to process, lowering num_bytes_to_process and num_resulting_chars, @@ -1465,14 +1670,14 @@ static BIF_RETTYPE do_bif_utf8_to_list(Process *p, left = allowed_iterations(p); - if (state == UTF8_ANALYZE_MORE) { - state = analyze_utf8(bytes + num_bytes_to_process, + if (state == ERTS_UTF8_ANALYZE_MORE) { + state = erts_analyze_utf8(bytes + num_bytes_to_process, size - num_bytes_to_process, &endpos,&numchar,&left); cost_to_proc(p,numchar); num_resulting_chars += numchar; num_bytes_to_process = endpos - bytes; - if (state == UTF8_ANALYZE_MORE) { + if (state == ERTS_UTF8_ANALYZE_MORE) { Eterm epos = erts_make_integer(num_bytes_to_process,p); Eterm enumchar = erts_make_integer(num_resulting_chars,p); erts_free_aligned_binary_bytes(temp_alloc); @@ -1528,7 +1733,7 @@ static BIF_RETTYPE do_bif_utf8_to_list(Process *p, ErlSubBin *sb; Eterm orig; Uint offset; - ASSERT(state != UTF8_OK); + ASSERT(state != ERTS_UTF8_OK); hp = HAlloc(p, ERL_SUB_BIN_SIZE); sb = (ErlSubBin *) hp; ERTS_GET_REAL_BIN(orig_bin, orig, offset, bitoffs, bitsize); @@ -1544,14 +1749,14 @@ static BIF_RETTYPE do_bif_utf8_to_list(Process *p, /* Done */ - if (state == UTF8_INCOMPLETE) { + if (state == ERTS_UTF8_INCOMPLETE) { if (check_leftovers(bytes + num_bytes_to_process + num_processed_bytes, b_sz) != 0) { goto error_return; } hp = HAlloc(p,4); ret = TUPLE3(hp,am_incomplete,converted,rest); - } else if (state == UTF8_ERROR) { + } else if (state == ERTS_UTF8_ERROR) { error_return: hp = HAlloc(p,4); ret = TUPLE3(hp,am_error,converted,rest); @@ -1589,7 +1794,7 @@ static BIF_RETTYPE characters_to_list_trap_3(BIF_ALIST_3) 0U, /* nothing processed yet */ num_bytes_to_process, num_resulting_chars, - UTF8_ANALYZE_MORE, /* always this state here */ + ERTS_UTF8_ANALYZE_MORE, /* always this state here */ NIL); /* Nothing built -> no tail yet */ } @@ -1642,7 +1847,7 @@ static BIF_RETTYPE utf8_to_list(BIF_ALIST_1) BIF_ERROR(BIF_P,BADARG); } return do_bif_utf8_to_list(BIF_P, BIF_ARG_1, 0U, 0U, 0U, - UTF8_ANALYZE_MORE,NIL); + ERTS_UTF8_ANALYZE_MORE,NIL); } @@ -1728,8 +1933,8 @@ binary_to_atom(Process* p, Eterm bin, Eterm enc, int must_exist) Uint n; int reds_left = bin_size+1; /* Number of reductions left. */ - if (analyze_utf8(bytes, bin_size, &err_pos, - &n, &reds_left) == UTF8_OK) { + if (erts_analyze_utf8(bytes, bin_size, &err_pos, + &n, &reds_left) == ERTS_UTF8_OK) { /* * Correct UTF-8 encoding, but too many characters to * fit in an atom. @@ -1818,7 +2023,7 @@ BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2) * Simpler non-interruptable routines for UTF-8 and * Windowish UTF-16 (restricted) **********************************************************/ -static Sint simple_char_need(Eterm ioterm, int encoding) +Sint erts_native_filename_need(Eterm ioterm, int encoding) { Eterm *objp; Eterm obj; @@ -1833,6 +2038,7 @@ static Sint simple_char_need(Eterm ioterm, int encoding) case ERL_FILENAME_LATIN1: need = ap->len; break; + case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: for (i = 0; i < ap->len; i++) { need += (ap->name[i] >= 0x80) ? 2 : 1; @@ -1882,6 +2088,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ } need += 1; break; + case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: if (x < 0x80) { need +=1; @@ -1956,7 +2163,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ return need; } -static void simple_put_chars(Eterm ioterm, int encoding, byte *p) +void erts_native_filename_put(Eterm ioterm, int encoding, byte *p) { Eterm *objp; Eterm obj; @@ -1972,6 +2179,7 @@ static void simple_put_chars(Eterm ioterm, int encoding, byte *p) *p++ = ap->name[i]; } break; + case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: for (i = 0; i < ap->len; i++) { if(ap->name[i] < 0x80) { @@ -2024,6 +2232,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ ASSERT( x < 256); *p++ = (byte) x; break; + case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: if (x < 0x80) { *p++ = (byte) x; @@ -2102,7 +2311,39 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ DESTROY_ESTACK(stack); return; } - +void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars) +{ + Uint unipoint; + + while (num_chars--) { + if (((*bytes) & ((byte) 0x80)) == 0) { + unipoint = (Uint) *bytes; + ++bytes; + } else if (((*bytes) & ((byte) 0xE0)) == 0xC0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0x1F))) << 6) | + ((Uint) (bytes[1] & ((byte) 0x3F))); + bytes += 2; + } else if (((*bytes) & ((byte) 0xF0)) == 0xE0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0xF))) << 12) | + (((Uint) (bytes[1] & ((byte) 0x3F))) << 6) | + ((Uint) (bytes[2] & ((byte) 0x3F))); + bytes +=3; + } else if (((*bytes) & ((byte) 0xF8)) == 0xF0) { + unipoint = + (((Uint) ((*bytes) & ((byte) 0x7))) << 18) | + (((Uint) (bytes[1] & ((byte) 0x3F))) << 12) | + (((Uint) (bytes[2] & ((byte) 0x3F))) << 6) | + ((Uint) (bytes[3] & ((byte) 0x3F))); + bytes += 4; + } else { + erl_exit(1,"Internal unicode error in prim_file:internal_name2native/1"); + } + *target++ = (byte) (unipoint & 0xFF); + *target++ = (byte) ((unipoint >> 8) & 0xFF); + } +} /* * This internal bif converts a filename to whatever format is suitable for the file driver @@ -2120,7 +2361,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) byte *bytes; byte *err_pos; Uint size,num_chars; - Uint unipoint; /* Uninterpreted encoding except if windows widechar, in case we convert from utf8 to win_wchar */ size = binary_size(BIF_ARG_1); @@ -2137,7 +2377,7 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) /* In a wchar world, the emulator flags only affect how binaries are interpreted when sent from the user. */ /* Determine real length and create a new binary */ - if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK || + if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK || erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { /* What to do now? Maybe latin1, so just take byte for byte instead */ bin_term = new_binary(BIF_P, 0, (size+1)*2); @@ -2154,43 +2394,16 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) /* OK, UTF8 ok, number of characters is in num_chars */ bin_term = new_binary(BIF_P, 0, (num_chars+1)*2); bin_p = binary_bytes(bin_term); - while (num_chars--) { - if (((*bytes) & ((byte) 0x80)) == 0) { - unipoint = (Uint) *bytes; - ++bytes; - } else if (((*bytes) & ((byte) 0xE0)) == 0xC0) { - unipoint = - (((Uint) ((*bytes) & ((byte) 0x1F))) << 6) | - ((Uint) (bytes[1] & ((byte) 0x3F))); - bytes += 2; - } else if (((*bytes) & ((byte) 0xF0)) == 0xE0) { - unipoint = - (((Uint) ((*bytes) & ((byte) 0xF))) << 12) | - (((Uint) (bytes[1] & ((byte) 0x3F))) << 6) | - ((Uint) (bytes[2] & ((byte) 0x3F))); - bytes +=3; - } else if (((*bytes) & ((byte) 0xF8)) == 0xF0) { - unipoint = - (((Uint) ((*bytes) & ((byte) 0x7))) << 18) | - (((Uint) (bytes[1] & ((byte) 0x3F))) << 12) | - (((Uint) (bytes[2] & ((byte) 0x3F))) << 6) | - ((Uint) (bytes[3] & ((byte) 0x3F))); - bytes += 4; - } else { - erl_exit(1,"Internal unicode error in file:name2native/1"); - } - *bin_p++ = (byte) (unipoint & 0xFF); - *bin_p++ = (byte) ((unipoint >> 8) & 0xFF); - } + erts_copy_utf8_to_utf16_little(bin_p, bytes, num_chars); /* zero termination */ - *bin_p++ = 0; - *bin_p++ = 0; + bin_p[num_chars*2] = 0; + bin_p[num_chars*2+1] = 0; erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(bin_term); } /* binary */ - if ((need = simple_char_need(BIF_ARG_1,encoding)) < 0) { + if ((need = erts_native_filename_need(BIF_ARG_1,encoding)) < 0) { BIF_ERROR(BIF_P,BADARG); } if (encoding == ERL_FILENAME_WIN_WCHAR) { @@ -2201,7 +2414,7 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) bin_term = new_binary(BIF_P, 0, need); bin_p = binary_bytes(bin_term); - simple_put_chars(BIF_ARG_1,encoding,bin_p); + erts_native_filename_put(BIF_ARG_1,encoding,bin_p); bin_p[need-1] = 0; if (encoding == ERL_FILENAME_WIN_WCHAR) { bin_p[need-2] = 0; @@ -2223,6 +2436,7 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) Uint num_built; /* characters */ Uint num_eaten; /* bytes */ Eterm ret; + int mac = 0; if (is_not_binary(BIF_ARG_1)) { BIF_ERROR(BIF_P,BADARG); @@ -2241,15 +2455,21 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) bytes = binary_bytes(real_bin)+offset; BIF_RET(erts_bin_bytes_to_list(NIL, hp, bytes, size, bitoffs)); + case ERL_FILENAME_UTF8_MAC: + mac = 1; case ERL_FILENAME_UTF8: bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - if (analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != UTF8_OK) { + if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { erts_free_aligned_binary_bytes(temp_alloc); goto noconvert; } num_built = 0; num_eaten = 0; - ret = do_utf8_to_list(BIF_P, num_chars, bytes, size, num_chars, &num_built, &num_eaten, NIL); + if (mac) { + ret = do_utf8_to_list_normalize(BIF_P, num_chars, bytes, size); + } else { + ret = do_utf8_to_list(BIF_P, num_chars, bytes, size, num_chars, &num_built, &num_eaten, NIL); + } erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); case ERL_FILENAME_WIN_WCHAR: @@ -2267,9 +2487,9 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) while (size > 0) { Uint x = ((Uint) *bytes--) << 8; x |= ((Uint) *bytes--); + size -= 2; ret = CONS(hp,make_small(x),ret); hp += 2; - size -= 2; } erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); @@ -2280,11 +2500,45 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) BIF_RET(BIF_ARG_1); } +BIF_RETTYPE prim_file_internal_normalize_utf8_1(BIF_ALIST_1) +{ + Eterm real_bin; + Uint offset; + Uint size,num_chars; + Uint bitsize; + Uint bitoffs; + Eterm ret; + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + size = binary_size(BIF_ARG_1); + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + BIF_ERROR(BIF_P,BADARG); + } + if (size == 0) { + BIF_RET(NIL); + } + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(BIF_P,BADARG); + } + ret = do_utf8_to_list_normalize(BIF_P, num_chars, bytes, size); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); +} + BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) { switch (erts_get_native_filename_encoding()) { case ERL_FILENAME_LATIN1: BIF_RET(am_latin1); + case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: BIF_RET(am_utf8); case ERL_FILENAME_WIN_WCHAR: diff --git a/erts/emulator/beam/erl_unicode_normalize.h b/erts/emulator/beam/erl_unicode_normalize.h new file mode 100644 index 0000000000..fb0a111ca2 --- /dev/null +++ b/erts/emulator/beam/erl_unicode_normalize.h @@ -0,0 +1,1687 @@ +/* +* %CopyrightBegin% +* +* Copyright Ericsson AB 1999-2010. All Rights Reserved. +* +* The contents of this file are subject to the Erlang Public License, +* Version 1.1, (the "License"); you may not use this file except in +* compliance with the License. You should have received a copy of the +* Erlang Public License along with this software. If not, it can be +* retrieved online at http://www.erlang.org/. +* +* Software distributed under the License is distributed on an "AS IS" +* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +* the License for the specific language governing rights and limitations +* under the License. +* +* %CopyrightEnd% +*/ +/* +* This file is automatically generated by dec.erl, do not edit manually +*/ +#define HASH_SIZE_FACTOR 2 +typedef struct _compose_entry { + Uint16 c; + Uint16 res; + Uint16 num_subs; + struct _compose_entry *subs; + int *hash; +} CompEntry; + +static int compose_tab_size = 61; +static int hash_compose_tab_0_15[12] = +{-1,3,-1,5,-1,0,4,2,-1,1,-1,-1}; /* hash_compose_tab_0_15 */ +static CompEntry compose_tab_0_15[] = { +{65, 7846, 0, NULL, NULL}, +{69, 7872, 0, NULL, NULL}, +{79, 7890, 0, NULL, NULL}, +{97, 7847, 0, NULL, NULL}, +{101, 7873, 0, NULL, NULL}, +{111, 7891, 0, NULL, NULL} +}; /* compose_tab_0_15 */ +static int hash_compose_tab_0_16[8] = +{3,-1,-1,-1,-1,0,2,1}; /* hash_compose_tab_0_16 */ +static CompEntry compose_tab_0_16[] = { +{69, 7700, 0, NULL, NULL}, +{79, 7760, 0, NULL, NULL}, +{101, 7701, 0, NULL, NULL}, +{111, 7761, 0, NULL, NULL} +}; /* compose_tab_0_16 */ +static int hash_compose_tab_0_17[4] = +{-1,0,1,-1}; /* hash_compose_tab_0_17 */ +static CompEntry compose_tab_0_17[] = { +{65, 7856, 0, NULL, NULL}, +{97, 7857, 0, NULL, NULL} +}; /* compose_tab_0_17 */ +static int hash_compose_tab_0_18[8] = +{-1,2,-1,-1,-1,0,1,3}; /* hash_compose_tab_0_18 */ +static CompEntry compose_tab_0_18[] = { +{85, 475, 0, NULL, NULL}, +{117, 476, 0, NULL, NULL}, +{953, 8146, 0, NULL, NULL}, +{965, 8162, 0, NULL, NULL} +}; /* compose_tab_0_18 */ +static int hash_compose_tab_0_19_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_0_19_0 */ +static CompEntry compose_tab_0_19_0[] = { +{913, 8074, 0, NULL, NULL}, +{919, 8090, 0, NULL, NULL}, +{937, 8106, 0, NULL, NULL}, +{945, 8066, 0, NULL, NULL}, +{951, 8082, 0, NULL, NULL}, +{969, 8098, 0, NULL, NULL} +}; /* compose_tab_0_19_0 */ +static int hash_compose_tab_0_19[28] = +{9,10,-1,5,-1,-1,-1,11,-1,-1,-1,-1,-1,6,12,-1,-1,1,13,-1,-1,2,7,3,-1,0,4,8}; /* hash_compose_tab_0_19 */ +static CompEntry compose_tab_0_19[] = { +{837, 0, 6, compose_tab_0_19_0, hash_compose_tab_0_19_0}, +{913, 7946, 0, NULL, NULL}, +{917, 7962, 0, NULL, NULL}, +{919, 7978, 0, NULL, NULL}, +{921, 7994, 0, NULL, NULL}, +{927, 8010, 0, NULL, NULL}, +{937, 8042, 0, NULL, NULL}, +{945, 7938, 0, NULL, NULL}, +{949, 7954, 0, NULL, NULL}, +{951, 7970, 0, NULL, NULL}, +{953, 7986, 0, NULL, NULL}, +{959, 8002, 0, NULL, NULL}, +{965, 8018, 0, NULL, NULL}, +{969, 8034, 0, NULL, NULL} +}; /* compose_tab_0_19 */ +static int hash_compose_tab_0_20_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_0_20_0 */ +static CompEntry compose_tab_0_20_0[] = { +{913, 8075, 0, NULL, NULL}, +{919, 8091, 0, NULL, NULL}, +{937, 8107, 0, NULL, NULL}, +{945, 8067, 0, NULL, NULL}, +{951, 8083, 0, NULL, NULL}, +{969, 8099, 0, NULL, NULL} +}; /* compose_tab_0_20_0 */ +static int hash_compose_tab_0_20[30] = +{-1,-1,-1,6,-1,13,-1,7,-1,14,-1,-1,-1,1,-1,8,-1,2,-1,3,9,4,10,11,-1,-1,-1,0,5, + 12}; /* hash_compose_tab_0_20 */ +static CompEntry compose_tab_0_20[] = { +{837, 0, 6, compose_tab_0_20_0, hash_compose_tab_0_20_0}, +{913, 7947, 0, NULL, NULL}, +{917, 7963, 0, NULL, NULL}, +{919, 7979, 0, NULL, NULL}, +{921, 7995, 0, NULL, NULL}, +{927, 8011, 0, NULL, NULL}, +{933, 8027, 0, NULL, NULL}, +{937, 8043, 0, NULL, NULL}, +{945, 7939, 0, NULL, NULL}, +{949, 7955, 0, NULL, NULL}, +{951, 7971, 0, NULL, NULL}, +{953, 7987, 0, NULL, NULL}, +{959, 8003, 0, NULL, NULL}, +{965, 8019, 0, NULL, NULL}, +{969, 8035, 0, NULL, NULL} +}; /* compose_tab_0_20 */ +static int hash_compose_tab_0_21[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_0_21 */ +static CompEntry compose_tab_0_21[] = { +{79, 7900, 0, NULL, NULL}, +{85, 7914, 0, NULL, NULL}, +{111, 7901, 0, NULL, NULL}, +{117, 7915, 0, NULL, NULL} +}; /* compose_tab_0_21 */ +static int hash_compose_tab_0_22[6] = +{-1,-1,-1,0,1,2}; /* hash_compose_tab_0_22 */ +static CompEntry compose_tab_0_22[] = { +{945, 8114, 0, NULL, NULL}, +{951, 8130, 0, NULL, NULL}, +{969, 8178, 0, NULL, NULL} +}; /* compose_tab_0_22 */ +static int hash_compose_tab_0[78] = +{38,3,29,-1,-1,-1,-1,4,19,5,20,6,14,30,31,21,32,33,37,7,-1,-1,-1,8,34,-1,-1,9, + -1,35,-1,-1,-1,10,36,-1,-1,-1,-1,11,-1,12,-1,13,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,23,-1,22,-1,24,-1,25,-1,26,-1,0,-1,-1,15,1,16,27,17,2,18,28,-1,-1}; /* hash_compose_tab_0 */ +static CompEntry compose_tab_0[] = { +{65, 192, 0, NULL, NULL}, +{69, 200, 0, NULL, NULL}, +{73, 204, 0, NULL, NULL}, +{79, 210, 0, NULL, NULL}, +{85, 217, 0, NULL, NULL}, +{87, 7808, 0, NULL, NULL}, +{89, 7922, 0, NULL, NULL}, +{97, 224, 0, NULL, NULL}, +{101, 232, 0, NULL, NULL}, +{105, 236, 0, NULL, NULL}, +{111, 242, 0, NULL, NULL}, +{117, 249, 0, NULL, NULL}, +{119, 7809, 0, NULL, NULL}, +{121, 7923, 0, NULL, NULL}, +{168, 8173, 0, NULL, NULL}, +{770, 0, 6, compose_tab_0_15, hash_compose_tab_0_15}, +{772, 0, 4, compose_tab_0_16, hash_compose_tab_0_16}, +{774, 0, 2, compose_tab_0_17, hash_compose_tab_0_17}, +{776, 0, 4, compose_tab_0_18, hash_compose_tab_0_18}, +{787, 0, 14, compose_tab_0_19, hash_compose_tab_0_19}, +{788, 0, 15, compose_tab_0_20, hash_compose_tab_0_20}, +{795, 0, 4, compose_tab_0_21, hash_compose_tab_0_21}, +{837, 0, 3, compose_tab_0_22, hash_compose_tab_0_22}, +{913, 8122, 0, NULL, NULL}, +{917, 8136, 0, NULL, NULL}, +{919, 8138, 0, NULL, NULL}, +{921, 8154, 0, NULL, NULL}, +{927, 8184, 0, NULL, NULL}, +{933, 8170, 0, NULL, NULL}, +{937, 8186, 0, NULL, NULL}, +{945, 8048, 0, NULL, NULL}, +{949, 8050, 0, NULL, NULL}, +{951, 8052, 0, NULL, NULL}, +{953, 8054, 0, NULL, NULL}, +{959, 8056, 0, NULL, NULL}, +{965, 8058, 0, NULL, NULL}, +{969, 8060, 0, NULL, NULL}, +{8127, 8141, 0, NULL, NULL}, +{8190, 8157, 0, NULL, NULL} +}; /* compose_tab_0 */ +static int hash_compose_tab_1_39[12] = +{-1,3,-1,5,-1,0,4,2,-1,1,-1,-1}; /* hash_compose_tab_1_39 */ +static CompEntry compose_tab_1_39[] = { +{65, 7844, 0, NULL, NULL}, +{69, 7870, 0, NULL, NULL}, +{79, 7888, 0, NULL, NULL}, +{97, 7845, 0, NULL, NULL}, +{101, 7871, 0, NULL, NULL}, +{111, 7889, 0, NULL, NULL} +}; /* compose_tab_1_39 */ +static int hash_compose_tab_1_40[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_1_40 */ +static CompEntry compose_tab_1_40[] = { +{79, 7756, 0, NULL, NULL}, +{85, 7800, 0, NULL, NULL}, +{111, 7757, 0, NULL, NULL}, +{117, 7801, 0, NULL, NULL} +}; /* compose_tab_1_40 */ +static int hash_compose_tab_1_41[8] = +{3,-1,-1,-1,-1,0,2,1}; /* hash_compose_tab_1_41 */ +static CompEntry compose_tab_1_41[] = { +{69, 7702, 0, NULL, NULL}, +{79, 7762, 0, NULL, NULL}, +{101, 7703, 0, NULL, NULL}, +{111, 7763, 0, NULL, NULL} +}; /* compose_tab_1_41 */ +static int hash_compose_tab_1_42[4] = +{-1,0,1,-1}; /* hash_compose_tab_1_42 */ +static CompEntry compose_tab_1_42[] = { +{65, 7854, 0, NULL, NULL}, +{97, 7855, 0, NULL, NULL} +}; /* compose_tab_1_42 */ +static int hash_compose_tab_1_43[12] = +{-1,0,1,-1,-1,4,5,-1,-1,2,3,-1}; /* hash_compose_tab_1_43 */ +static CompEntry compose_tab_1_43[] = { +{73, 7726, 0, NULL, NULL}, +{85, 471, 0, NULL, NULL}, +{105, 7727, 0, NULL, NULL}, +{117, 472, 0, NULL, NULL}, +{953, 8147, 0, NULL, NULL}, +{965, 8163, 0, NULL, NULL} +}; /* compose_tab_1_43 */ +static int hash_compose_tab_1_44[4] = +{-1,0,1,-1}; /* hash_compose_tab_1_44 */ +static CompEntry compose_tab_1_44[] = { +{65, 506, 0, NULL, NULL}, +{97, 507, 0, NULL, NULL} +}; /* compose_tab_1_44 */ +static int hash_compose_tab_1_45_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_1_45_0 */ +static CompEntry compose_tab_1_45_0[] = { +{913, 8076, 0, NULL, NULL}, +{919, 8092, 0, NULL, NULL}, +{937, 8108, 0, NULL, NULL}, +{945, 8068, 0, NULL, NULL}, +{951, 8084, 0, NULL, NULL}, +{969, 8100, 0, NULL, NULL} +}; /* compose_tab_1_45_0 */ +static int hash_compose_tab_1_45[28] = +{9,10,-1,5,-1,-1,-1,11,-1,-1,-1,-1,-1,6,12,-1,-1,1,13,-1,-1,2,7,3,-1,0,4,8}; /* hash_compose_tab_1_45 */ +static CompEntry compose_tab_1_45[] = { +{837, 0, 6, compose_tab_1_45_0, hash_compose_tab_1_45_0}, +{913, 7948, 0, NULL, NULL}, +{917, 7964, 0, NULL, NULL}, +{919, 7980, 0, NULL, NULL}, +{921, 7996, 0, NULL, NULL}, +{927, 8012, 0, NULL, NULL}, +{937, 8044, 0, NULL, NULL}, +{945, 7940, 0, NULL, NULL}, +{949, 7956, 0, NULL, NULL}, +{951, 7972, 0, NULL, NULL}, +{953, 7988, 0, NULL, NULL}, +{959, 8004, 0, NULL, NULL}, +{965, 8020, 0, NULL, NULL}, +{969, 8036, 0, NULL, NULL} +}; /* compose_tab_1_45 */ +static int hash_compose_tab_1_46_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_1_46_0 */ +static CompEntry compose_tab_1_46_0[] = { +{913, 8077, 0, NULL, NULL}, +{919, 8093, 0, NULL, NULL}, +{937, 8109, 0, NULL, NULL}, +{945, 8069, 0, NULL, NULL}, +{951, 8085, 0, NULL, NULL}, +{969, 8101, 0, NULL, NULL} +}; /* compose_tab_1_46_0 */ +static int hash_compose_tab_1_46[30] = +{-1,-1,-1,6,-1,13,-1,7,-1,14,-1,-1,-1,1,-1,8,-1,2,-1,3,9,4,10,11,-1,-1,-1,0,5, + 12}; /* hash_compose_tab_1_46 */ +static CompEntry compose_tab_1_46[] = { +{837, 0, 6, compose_tab_1_46_0, hash_compose_tab_1_46_0}, +{913, 7949, 0, NULL, NULL}, +{917, 7965, 0, NULL, NULL}, +{919, 7981, 0, NULL, NULL}, +{921, 7997, 0, NULL, NULL}, +{927, 8013, 0, NULL, NULL}, +{933, 8029, 0, NULL, NULL}, +{937, 8045, 0, NULL, NULL}, +{945, 7941, 0, NULL, NULL}, +{949, 7957, 0, NULL, NULL}, +{951, 7973, 0, NULL, NULL}, +{953, 7989, 0, NULL, NULL}, +{959, 8005, 0, NULL, NULL}, +{965, 8021, 0, NULL, NULL}, +{969, 8037, 0, NULL, NULL} +}; /* compose_tab_1_46 */ +static int hash_compose_tab_1_47[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_1_47 */ +static CompEntry compose_tab_1_47[] = { +{79, 7898, 0, NULL, NULL}, +{85, 7912, 0, NULL, NULL}, +{111, 7899, 0, NULL, NULL}, +{117, 7913, 0, NULL, NULL} +}; /* compose_tab_1_47 */ +static int hash_compose_tab_1_48[4] = +{1,-1,-1,0}; /* hash_compose_tab_1_48 */ +static CompEntry compose_tab_1_48[] = { +{67, 7688, 0, NULL, NULL}, +{99, 7689, 0, NULL, NULL} +}; /* compose_tab_1_48 */ +static int hash_compose_tab_1_49[6] = +{-1,-1,-1,0,1,2}; /* hash_compose_tab_1_49 */ +static CompEntry compose_tab_1_49[] = { +{945, 8116, 0, NULL, NULL}, +{951, 8132, 0, NULL, NULL}, +{959, 8180, 0, NULL, NULL} +}; /* compose_tab_1_49 */ +static int hash_compose_tab_1[140] = +{-1,-1,-1,-1,-1,-1,-1,68,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,34,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,35,-1,-1,-1,-1,64,-1,0,-1,1,-1,2,39,3,40,4,41,5,6,7, + 8,9,10,36,11,12,42,13,43,14,44,15,16,37,45,46,50,47,51,17,52,18,53,19,54,20, + 55,21,56,22,23,24,25,26,27,38,28,29,48,30,57,31,58,32,33,59,60,61,62,65,66, + 63,67,69,-1,-1,-1,-1,-1,49,-1,-1}; /* hash_compose_tab_1 */ +static CompEntry compose_tab_1[] = { +{65, 193, 0, NULL, NULL}, +{67, 262, 0, NULL, NULL}, +{69, 201, 0, NULL, NULL}, +{71, 500, 0, NULL, NULL}, +{73, 205, 0, NULL, NULL}, +{75, 7728, 0, NULL, NULL}, +{76, 313, 0, NULL, NULL}, +{77, 7742, 0, NULL, NULL}, +{78, 323, 0, NULL, NULL}, +{79, 211, 0, NULL, NULL}, +{80, 7764, 0, NULL, NULL}, +{82, 340, 0, NULL, NULL}, +{83, 346, 0, NULL, NULL}, +{85, 218, 0, NULL, NULL}, +{87, 7810, 0, NULL, NULL}, +{89, 221, 0, NULL, NULL}, +{90, 377, 0, NULL, NULL}, +{97, 225, 0, NULL, NULL}, +{99, 263, 0, NULL, NULL}, +{101, 233, 0, NULL, NULL}, +{103, 501, 0, NULL, NULL}, +{105, 237, 0, NULL, NULL}, +{107, 7729, 0, NULL, NULL}, +{108, 314, 0, NULL, NULL}, +{109, 7743, 0, NULL, NULL}, +{110, 324, 0, NULL, NULL}, +{111, 243, 0, NULL, NULL}, +{112, 7765, 0, NULL, NULL}, +{114, 341, 0, NULL, NULL}, +{115, 347, 0, NULL, NULL}, +{117, 250, 0, NULL, NULL}, +{119, 7811, 0, NULL, NULL}, +{121, 253, 0, NULL, NULL}, +{122, 378, 0, NULL, NULL}, +{168, 8174, 0, NULL, NULL}, +{198, 508, 0, NULL, NULL}, +{216, 510, 0, NULL, NULL}, +{230, 509, 0, NULL, NULL}, +{248, 511, 0, NULL, NULL}, +{770, 0, 6, compose_tab_1_39, hash_compose_tab_1_39}, +{771, 0, 4, compose_tab_1_40, hash_compose_tab_1_40}, +{772, 0, 4, compose_tab_1_41, hash_compose_tab_1_41}, +{774, 0, 2, compose_tab_1_42, hash_compose_tab_1_42}, +{776, 0, 6, compose_tab_1_43, hash_compose_tab_1_43}, +{778, 0, 2, compose_tab_1_44, hash_compose_tab_1_44}, +{787, 0, 14, compose_tab_1_45, hash_compose_tab_1_45}, +{788, 0, 15, compose_tab_1_46, hash_compose_tab_1_46}, +{795, 0, 4, compose_tab_1_47, hash_compose_tab_1_47}, +{807, 0, 2, compose_tab_1_48, hash_compose_tab_1_48}, +{837, 0, 3, compose_tab_1_49, hash_compose_tab_1_49}, +{913, 8123, 0, NULL, NULL}, +{917, 8137, 0, NULL, NULL}, +{919, 8139, 0, NULL, NULL}, +{921, 8155, 0, NULL, NULL}, +{927, 8185, 0, NULL, NULL}, +{933, 8171, 0, NULL, NULL}, +{937, 8187, 0, NULL, NULL}, +{945, 8049, 0, NULL, NULL}, +{949, 8051, 0, NULL, NULL}, +{951, 8053, 0, NULL, NULL}, +{953, 8055, 0, NULL, NULL}, +{959, 8057, 0, NULL, NULL}, +{965, 8059, 0, NULL, NULL}, +{969, 8061, 0, NULL, NULL}, +{1043, 1027, 0, NULL, NULL}, +{1050, 1036, 0, NULL, NULL}, +{1075, 1107, 0, NULL, NULL}, +{1082, 1116, 0, NULL, NULL}, +{8127, 8142, 0, NULL, NULL}, +{8190, 8158, 0, NULL, NULL} +}; /* compose_tab_1 */ +static int hash_compose_tab_2_26[12] = +{-1,3,-1,5,-1,0,4,2,-1,1,-1,-1}; /* hash_compose_tab_2_26 */ +static CompEntry compose_tab_2_26[] = { +{65, 7852, 0, NULL, NULL}, +{69, 7878, 0, NULL, NULL}, +{79, 7896, 0, NULL, NULL}, +{97, 7853, 0, NULL, NULL}, +{101, 7879, 0, NULL, NULL}, +{111, 7897, 0, NULL, NULL} +}; /* compose_tab_2_26 */ +static int hash_compose_tab_2[54] = +{-1,-1,-1,20,-1,-1,-1,21,-1,22,-1,0,23,1,24,2,25,3,4,5,6,-1,-1,-1,-1,7,-1,-1, + -1,8,-1,9,-1,10,-1,11,12,-1,-1,-1,-1,-1,-1,13,-1,14,-1,15,26,16,17,18,19,-1}; /* hash_compose_tab_2 */ +static CompEntry compose_tab_2[] = { +{65, 194, 0, NULL, NULL}, +{67, 264, 0, NULL, NULL}, +{69, 202, 0, NULL, NULL}, +{71, 284, 0, NULL, NULL}, +{72, 292, 0, NULL, NULL}, +{73, 206, 0, NULL, NULL}, +{74, 308, 0, NULL, NULL}, +{79, 212, 0, NULL, NULL}, +{83, 348, 0, NULL, NULL}, +{85, 219, 0, NULL, NULL}, +{87, 372, 0, NULL, NULL}, +{89, 374, 0, NULL, NULL}, +{90, 7824, 0, NULL, NULL}, +{97, 226, 0, NULL, NULL}, +{99, 265, 0, NULL, NULL}, +{101, 234, 0, NULL, NULL}, +{103, 285, 0, NULL, NULL}, +{104, 293, 0, NULL, NULL}, +{105, 238, 0, NULL, NULL}, +{106, 309, 0, NULL, NULL}, +{111, 244, 0, NULL, NULL}, +{115, 349, 0, NULL, NULL}, +{117, 251, 0, NULL, NULL}, +{119, 373, 0, NULL, NULL}, +{121, 375, 0, NULL, NULL}, +{122, 7825, 0, NULL, NULL}, +{803, 0, 6, compose_tab_2_26, hash_compose_tab_2_26} +}; /* compose_tab_2 */ +static int hash_compose_tab_3_16[12] = +{-1,3,-1,5,-1,0,4,2,-1,1,-1,-1}; /* hash_compose_tab_3_16 */ +static CompEntry compose_tab_3_16[] = { +{65, 7850, 0, NULL, NULL}, +{69, 7876, 0, NULL, NULL}, +{79, 7894, 0, NULL, NULL}, +{97, 7851, 0, NULL, NULL}, +{101, 7877, 0, NULL, NULL}, +{111, 7895, 0, NULL, NULL} +}; /* compose_tab_3_16 */ +static int hash_compose_tab_3_17[4] = +{-1,0,1,-1}; /* hash_compose_tab_3_17 */ +static CompEntry compose_tab_3_17[] = { +{65, 7860, 0, NULL, NULL}, +{97, 7861, 0, NULL, NULL} +}; /* compose_tab_3_17 */ +static int hash_compose_tab_3_18[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_3_18 */ +static CompEntry compose_tab_3_18[] = { +{79, 7904, 0, NULL, NULL}, +{85, 7918, 0, NULL, NULL}, +{111, 7905, 0, NULL, NULL}, +{117, 7919, 0, NULL, NULL} +}; /* compose_tab_3_18 */ +static int hash_compose_tab_3[38] = +{-1,-1,3,4,13,14,-1,15,-1,5,6,16,-1,7,17,-1,-1,-1,-1,-1,-1,8,-1,-1,-1,9,-1,0, + -1,10,-1,1,-1,-1,11,2,12,18}; /* hash_compose_tab_3 */ +static CompEntry compose_tab_3[] = { +{65, 195, 0, NULL, NULL}, +{69, 7868, 0, NULL, NULL}, +{73, 296, 0, NULL, NULL}, +{78, 209, 0, NULL, NULL}, +{79, 213, 0, NULL, NULL}, +{85, 360, 0, NULL, NULL}, +{86, 7804, 0, NULL, NULL}, +{89, 7928, 0, NULL, NULL}, +{97, 227, 0, NULL, NULL}, +{101, 7869, 0, NULL, NULL}, +{105, 297, 0, NULL, NULL}, +{110, 241, 0, NULL, NULL}, +{111, 245, 0, NULL, NULL}, +{117, 361, 0, NULL, NULL}, +{118, 7805, 0, NULL, NULL}, +{121, 7929, 0, NULL, NULL}, +{770, 0, 6, compose_tab_3_16, hash_compose_tab_3_16}, +{774, 0, 2, compose_tab_3_17, hash_compose_tab_3_17}, +{795, 0, 4, compose_tab_3_18, hash_compose_tab_3_18} +}; /* compose_tab_3 */ +static int hash_compose_tab_4_14[4] = +{-1,0,1,-1}; /* hash_compose_tab_4_14 */ +static CompEntry compose_tab_4_14[] = { +{65, 480, 0, NULL, NULL}, +{97, 481, 0, NULL, NULL} +}; /* compose_tab_4_14 */ +static int hash_compose_tab_4_15[8] = +{-1,0,2,-1,-1,1,3,-1}; /* hash_compose_tab_4_15 */ +static CompEntry compose_tab_4_15[] = { +{65, 478, 0, NULL, NULL}, +{85, 469, 0, NULL, NULL}, +{97, 479, 0, NULL, NULL}, +{117, 470, 0, NULL, NULL} +}; /* compose_tab_4_15 */ +static int hash_compose_tab_4_16[8] = +{-1,-1,1,3,0,2,-1,-1}; /* hash_compose_tab_4_16 */ +static CompEntry compose_tab_4_16[] = { +{76, 7736, 0, NULL, NULL}, +{82, 7772, 0, NULL, NULL}, +{108, 7737, 0, NULL, NULL}, +{114, 7773, 0, NULL, NULL} +}; /* compose_tab_4_16 */ +static int hash_compose_tab_4_17[4] = +{1,-1,-1,0}; /* hash_compose_tab_4_17 */ +static CompEntry compose_tab_4_17[] = { +{79, 492, 0, NULL, NULL}, +{111, 493, 0, NULL, NULL} +}; /* compose_tab_4_17 */ +static int hash_compose_tab_4[56] = +{-1,22,-1,-1,-1,11,13,-1,-1,0,-1,-1,-1,1,23,2,26,3,18,16,-1,-1,-1,4,17,19,-1, + 27,-1,5,12,-1,-1,-1,-1,-1,-1,20,-1,-1,24,6,-1,-1,-1,7,-1,8,14,9,15,21,25,-1, + -1,10}; /* hash_compose_tab_4 */ +static CompEntry compose_tab_4[] = { +{65, 256, 0, NULL, NULL}, +{69, 274, 0, NULL, NULL}, +{71, 7712, 0, NULL, NULL}, +{73, 298, 0, NULL, NULL}, +{79, 332, 0, NULL, NULL}, +{85, 362, 0, NULL, NULL}, +{97, 257, 0, NULL, NULL}, +{101, 275, 0, NULL, NULL}, +{103, 7713, 0, NULL, NULL}, +{105, 299, 0, NULL, NULL}, +{111, 333, 0, NULL, NULL}, +{117, 363, 0, NULL, NULL}, +{198, 482, 0, NULL, NULL}, +{230, 483, 0, NULL, NULL}, +{775, 0, 2, compose_tab_4_14, hash_compose_tab_4_14}, +{776, 0, 4, compose_tab_4_15, hash_compose_tab_4_15}, +{803, 0, 4, compose_tab_4_16, hash_compose_tab_4_16}, +{808, 0, 2, compose_tab_4_17, hash_compose_tab_4_17}, +{913, 8121, 0, NULL, NULL}, +{921, 8153, 0, NULL, NULL}, +{933, 8169, 0, NULL, NULL}, +{945, 8113, 0, NULL, NULL}, +{953, 8145, 0, NULL, NULL}, +{965, 8161, 0, NULL, NULL}, +{1048, 1250, 0, NULL, NULL}, +{1059, 1262, 0, NULL, NULL}, +{1080, 1251, 0, NULL, NULL}, +{1091, 1263, 0, NULL, NULL} +}; /* compose_tab_4 */ +static int hash_compose_tab_5_12[4] = +{-1,0,1,-1}; /* hash_compose_tab_5_12 */ +static CompEntry compose_tab_5_12[] = { +{65, 7862, 0, NULL, NULL}, +{97, 7863, 0, NULL, NULL} +}; /* compose_tab_5_12 */ +static int hash_compose_tab_5_13[4] = +{-1,0,1,-1}; /* hash_compose_tab_5_13 */ +static CompEntry compose_tab_5_13[] = { +{69, 7708, 0, NULL, NULL}, +{101, 7709, 0, NULL, NULL} +}; /* compose_tab_5_13 */ +static int hash_compose_tab_5[60] = +{28,-1,-1,-1,-1,0,19,-1,-1,1,-1,2,29,3,14,-1,-1,-1,-1,4,20,15,-1,12,-1,5,21, + 13,22,23,-1,-1,-1,16,-1,-1,-1,6,-1,24,-1,7,-1,8,-1,9,17,-1,-1,-1,-1,10,25,18, + -1,-1,-1,11,26,27}; /* hash_compose_tab_5 */ +static CompEntry compose_tab_5[] = { +{65, 258, 0, NULL, NULL}, +{69, 276, 0, NULL, NULL}, +{71, 286, 0, NULL, NULL}, +{73, 300, 0, NULL, NULL}, +{79, 334, 0, NULL, NULL}, +{85, 364, 0, NULL, NULL}, +{97, 259, 0, NULL, NULL}, +{101, 277, 0, NULL, NULL}, +{103, 287, 0, NULL, NULL}, +{105, 301, 0, NULL, NULL}, +{111, 335, 0, NULL, NULL}, +{117, 365, 0, NULL, NULL}, +{803, 0, 2, compose_tab_5_12, hash_compose_tab_5_12}, +{807, 0, 2, compose_tab_5_13, hash_compose_tab_5_13}, +{913, 8120, 0, NULL, NULL}, +{921, 8152, 0, NULL, NULL}, +{933, 8168, 0, NULL, NULL}, +{945, 8112, 0, NULL, NULL}, +{953, 8144, 0, NULL, NULL}, +{965, 8160, 0, NULL, NULL}, +{1040, 1232, 0, NULL, NULL}, +{1045, 1238, 0, NULL, NULL}, +{1046, 1217, 0, NULL, NULL}, +{1048, 1049, 0, NULL, NULL}, +{1059, 1038, 0, NULL, NULL}, +{1072, 1233, 0, NULL, NULL}, +{1077, 1239, 0, NULL, NULL}, +{1078, 1218, 0, NULL, NULL}, +{1080, 1081, 0, NULL, NULL}, +{1091, 1118, 0, NULL, NULL} +}; /* compose_tab_5 */ +static int hash_compose_tab_6_36[4] = +{1,-1,-1,0}; /* hash_compose_tab_6_36 */ +static CompEntry compose_tab_6_36[] = { +{83, 7780, 0, NULL, NULL}, +{115, 7781, 0, NULL, NULL} +}; /* compose_tab_6_36 */ +static int hash_compose_tab_6_38[4] = +{1,-1,-1,0}; /* hash_compose_tab_6_38 */ +static CompEntry compose_tab_6_38[] = { +{83, 7782, 0, NULL, NULL}, +{115, 7783, 0, NULL, NULL} +}; /* compose_tab_6_38 */ +static int hash_compose_tab_6_39[4] = +{1,-1,-1,0}; /* hash_compose_tab_6_39 */ +static CompEntry compose_tab_6_39[] = { +{83, 7784, 0, NULL, NULL}, +{115, 7785, 0, NULL, NULL} +}; /* compose_tab_6_39 */ +static int hash_compose_tab_6[80] = +{10,-1,11,12,13,39,-1,14,15,16,17,-1,-1,-1,-1,-1,-1,-1,18,19,20,21,22,23,24, + -1,-1,-1,-1,25,26,-1,27,-1,28,29,30,-1,-1,31,32,33,34,-1,-1,-1,-1,-1,-1,36, + -1,-1,-1,-1,37,-1,-1,-1,-1,-1,38,-1,-1,35,-1,-1,0,1,2,3,4,5,6,7,-1,-1,-1,8,9, + -1}; /* hash_compose_tab_6 */ +static CompEntry compose_tab_6[] = { +{66, 7682, 0, NULL, NULL}, +{67, 266, 0, NULL, NULL}, +{68, 7690, 0, NULL, NULL}, +{69, 278, 0, NULL, NULL}, +{70, 7710, 0, NULL, NULL}, +{71, 288, 0, NULL, NULL}, +{72, 7714, 0, NULL, NULL}, +{73, 304, 0, NULL, NULL}, +{77, 7744, 0, NULL, NULL}, +{78, 7748, 0, NULL, NULL}, +{80, 7766, 0, NULL, NULL}, +{82, 7768, 0, NULL, NULL}, +{83, 7776, 0, NULL, NULL}, +{84, 7786, 0, NULL, NULL}, +{87, 7814, 0, NULL, NULL}, +{88, 7818, 0, NULL, NULL}, +{89, 7822, 0, NULL, NULL}, +{90, 379, 0, NULL, NULL}, +{98, 7683, 0, NULL, NULL}, +{99, 267, 0, NULL, NULL}, +{100, 7691, 0, NULL, NULL}, +{101, 279, 0, NULL, NULL}, +{102, 7711, 0, NULL, NULL}, +{103, 289, 0, NULL, NULL}, +{104, 7715, 0, NULL, NULL}, +{109, 7745, 0, NULL, NULL}, +{110, 7749, 0, NULL, NULL}, +{112, 7767, 0, NULL, NULL}, +{114, 7769, 0, NULL, NULL}, +{115, 7777, 0, NULL, NULL}, +{116, 7787, 0, NULL, NULL}, +{119, 7815, 0, NULL, NULL}, +{120, 7819, 0, NULL, NULL}, +{121, 7823, 0, NULL, NULL}, +{122, 380, 0, NULL, NULL}, +{383, 7835, 0, NULL, NULL}, +{769, 0, 2, compose_tab_6_36, hash_compose_tab_6_36}, +{774, 784, 0, NULL, NULL}, +{780, 0, 2, compose_tab_6_38, hash_compose_tab_6_38}, +{803, 0, 2, compose_tab_6_39, hash_compose_tab_6_39} +}; /* compose_tab_6 */ +static int hash_compose_tab_7_23[4] = +{1,-1,-1,0}; /* hash_compose_tab_7_23 */ +static CompEntry compose_tab_7_23[] = { +{79, 7758, 0, NULL, NULL}, +{111, 7759, 0, NULL, NULL} +}; /* compose_tab_7_23 */ +static int hash_compose_tab_7_24[4] = +{-1,0,1,-1}; /* hash_compose_tab_7_24 */ +static CompEntry compose_tab_7_24[] = { +{85, 7802, 0, NULL, NULL}, +{117, 7803, 0, NULL, NULL} +}; /* compose_tab_7_24 */ +static int hash_compose_tab_7[100] = +{48,10,21,-1,11,12,-1,-1,-1,-1,49,13,-1,-1,-1,20,14,15,-1,16,17,18,25,-1,-1, + -1,-1,-1,-1,22,30,-1,-1,26,-1,-1,-1,-1,-1,-1,31,-1,-1,-1,-1,32,33,34,35,-1, + -1,-1,-1,27,36,-1,-1,-1,-1,37,-1,-1,-1,38,-1,0,28,39,-1,1,-1,23,2,3,24,40,-1, + 41,29,4,42,43,44,-1,-1,5,45,6,7,8,-1,46,-1,-1,-1,47,-1,9,-1,19}; /* hash_compose_tab_7 */ +static CompEntry compose_tab_7[] = { +{65, 196, 0, NULL, NULL}, +{69, 203, 0, NULL, NULL}, +{72, 7718, 0, NULL, NULL}, +{73, 207, 0, NULL, NULL}, +{79, 214, 0, NULL, NULL}, +{85, 220, 0, NULL, NULL}, +{87, 7812, 0, NULL, NULL}, +{88, 7820, 0, NULL, NULL}, +{89, 376, 0, NULL, NULL}, +{97, 228, 0, NULL, NULL}, +{101, 235, 0, NULL, NULL}, +{104, 7719, 0, NULL, NULL}, +{105, 239, 0, NULL, NULL}, +{111, 246, 0, NULL, NULL}, +{116, 7831, 0, NULL, NULL}, +{117, 252, 0, NULL, NULL}, +{119, 7813, 0, NULL, NULL}, +{120, 7821, 0, NULL, NULL}, +{121, 255, 0, NULL, NULL}, +{399, 1242, 0, NULL, NULL}, +{415, 1258, 0, NULL, NULL}, +{601, 1243, 0, NULL, NULL}, +{629, 1259, 0, NULL, NULL}, +{771, 0, 2, compose_tab_7_23, hash_compose_tab_7_23}, +{772, 0, 2, compose_tab_7_24, hash_compose_tab_7_24}, +{921, 938, 0, NULL, NULL}, +{933, 939, 0, NULL, NULL}, +{953, 970, 0, NULL, NULL}, +{965, 971, 0, NULL, NULL}, +{978, 980, 0, NULL, NULL}, +{1030, 1031, 0, NULL, NULL}, +{1040, 1234, 0, NULL, NULL}, +{1045, 1025, 0, NULL, NULL}, +{1046, 1244, 0, NULL, NULL}, +{1047, 1246, 0, NULL, NULL}, +{1048, 1252, 0, NULL, NULL}, +{1054, 1254, 0, NULL, NULL}, +{1059, 1264, 0, NULL, NULL}, +{1063, 1268, 0, NULL, NULL}, +{1067, 1272, 0, NULL, NULL}, +{1072, 1235, 0, NULL, NULL}, +{1077, 1105, 0, NULL, NULL}, +{1078, 1245, 0, NULL, NULL}, +{1079, 1247, 0, NULL, NULL}, +{1080, 1253, 0, NULL, NULL}, +{1086, 1255, 0, NULL, NULL}, +{1091, 1265, 0, NULL, NULL}, +{1095, 1269, 0, NULL, NULL}, +{1099, 1273, 0, NULL, NULL}, +{1110, 1111, 0, NULL, NULL} +}; /* compose_tab_7 */ +static int hash_compose_tab_8_12[12] = +{-1,3,-1,5,-1,0,4,2,-1,1,-1,-1}; /* hash_compose_tab_8_12 */ +static CompEntry compose_tab_8_12[] = { +{65, 7848, 0, NULL, NULL}, +{69, 7874, 0, NULL, NULL}, +{79, 7892, 0, NULL, NULL}, +{97, 7849, 0, NULL, NULL}, +{101, 7875, 0, NULL, NULL}, +{111, 7893, 0, NULL, NULL} +}; /* compose_tab_8_12 */ +static int hash_compose_tab_8_13[4] = +{-1,0,1,-1}; /* hash_compose_tab_8_13 */ +static CompEntry compose_tab_8_13[] = { +{65, 7858, 0, NULL, NULL}, +{97, 7859, 0, NULL, NULL} +}; /* compose_tab_8_13 */ +static int hash_compose_tab_8_14[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_8_14 */ +static CompEntry compose_tab_8_14[] = { +{79, 7902, 0, NULL, NULL}, +{85, 7916, 0, NULL, NULL}, +{111, 7903, 0, NULL, NULL}, +{117, 7917, 0, NULL, NULL} +}; /* compose_tab_8_14 */ +static int hash_compose_tab_8[30] = +{-1,11,-1,-1,-1,0,-1,6,-1,1,-1,7,-1,2,-1,8,14,-1,-1,3,12,9,-1,-1,13,4,-1,10, + -1,5}; /* hash_compose_tab_8 */ +static CompEntry compose_tab_8[] = { +{65, 7842, 0, NULL, NULL}, +{69, 7866, 0, NULL, NULL}, +{73, 7880, 0, NULL, NULL}, +{79, 7886, 0, NULL, NULL}, +{85, 7910, 0, NULL, NULL}, +{89, 7926, 0, NULL, NULL}, +{97, 7843, 0, NULL, NULL}, +{101, 7867, 0, NULL, NULL}, +{105, 7881, 0, NULL, NULL}, +{111, 7887, 0, NULL, NULL}, +{117, 7911, 0, NULL, NULL}, +{121, 7927, 0, NULL, NULL}, +{770, 0, 6, compose_tab_8_12, hash_compose_tab_8_12}, +{774, 0, 2, compose_tab_8_13, hash_compose_tab_8_13}, +{795, 0, 4, compose_tab_8_14, hash_compose_tab_8_14} +}; /* compose_tab_8 */ +static int hash_compose_tab_9[12] = +{-1,1,2,5,-1,0,-1,-1,-1,3,-1,4}; /* hash_compose_tab_9 */ +static CompEntry compose_tab_9[] = { +{65, 197, 0, NULL, NULL}, +{85, 366, 0, NULL, NULL}, +{97, 229, 0, NULL, NULL}, +{117, 367, 0, NULL, NULL}, +{119, 7832, 0, NULL, NULL}, +{121, 7833, 0, NULL, NULL} +}; /* compose_tab_9 */ +static int hash_compose_tab_10[12] = +{-1,1,-1,2,4,-1,-1,0,-1,3,-1,5}; /* hash_compose_tab_10 */ +static CompEntry compose_tab_10[] = { +{79, 336, 0, NULL, NULL}, +{85, 368, 0, NULL, NULL}, +{111, 337, 0, NULL, NULL}, +{117, 369, 0, NULL, NULL}, +{1059, 1266, 0, NULL, NULL}, +{1091, 1267, 0, NULL, NULL} +}; /* compose_tab_10 */ +static int hash_compose_tab_11_33[4] = +{-1,0,1,-1}; /* hash_compose_tab_11_33 */ +static CompEntry compose_tab_11_33[] = { +{85, 473, 0, NULL, NULL}, +{117, 474, 0, NULL, NULL} +}; /* compose_tab_11_33 */ +static int hash_compose_tab_11[68] = +{2,3,-1,4,-1,5,-1,6,7,-1,8,9,-1,-1,10,11,12,13,-1,-1,-1,-1,14,-1,-1,-1,-1,-1, + 33,15,-1,16,17,18,31,19,-1,20,21,22,23,-1,24,25,-1,-1,26,27,28,29,32,-1,-1, + -1,30,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,-1,1}; /* hash_compose_tab_11 */ +static CompEntry compose_tab_11[] = { +{65, 461, 0, NULL, NULL}, +{67, 268, 0, NULL, NULL}, +{68, 270, 0, NULL, NULL}, +{69, 282, 0, NULL, NULL}, +{71, 486, 0, NULL, NULL}, +{73, 463, 0, NULL, NULL}, +{75, 488, 0, NULL, NULL}, +{76, 317, 0, NULL, NULL}, +{78, 327, 0, NULL, NULL}, +{79, 465, 0, NULL, NULL}, +{82, 344, 0, NULL, NULL}, +{83, 352, 0, NULL, NULL}, +{84, 356, 0, NULL, NULL}, +{85, 467, 0, NULL, NULL}, +{90, 381, 0, NULL, NULL}, +{97, 462, 0, NULL, NULL}, +{99, 269, 0, NULL, NULL}, +{100, 271, 0, NULL, NULL}, +{101, 283, 0, NULL, NULL}, +{103, 487, 0, NULL, NULL}, +{105, 464, 0, NULL, NULL}, +{106, 496, 0, NULL, NULL}, +{107, 489, 0, NULL, NULL}, +{108, 318, 0, NULL, NULL}, +{110, 328, 0, NULL, NULL}, +{111, 466, 0, NULL, NULL}, +{114, 345, 0, NULL, NULL}, +{115, 353, 0, NULL, NULL}, +{116, 357, 0, NULL, NULL}, +{117, 468, 0, NULL, NULL}, +{122, 382, 0, NULL, NULL}, +{439, 494, 0, NULL, NULL}, +{658, 495, 0, NULL, NULL}, +{776, 0, 2, compose_tab_11_33, hash_compose_tab_11_33} +}; /* compose_tab_11 */ +static int hash_compose_tab_12_1[4] = +{-1,0,1,-1}; /* hash_compose_tab_12_1 */ +static CompEntry compose_tab_12_1[] = { +{953, 912, 0, NULL, NULL}, +{965, 944, 0, NULL, NULL} +}; /* compose_tab_12_1 */ +static int hash_compose_tab_12[34] = +{11,4,12,5,-1,-1,-1,13,-1,6,-1,-1,-1,14,-1,7,-1,15,-1,8,-1,-1,-1,-1,-1,-1,16, + 9,1,2,-1,10,0,3}; /* hash_compose_tab_12 */ +static CompEntry compose_tab_12[] = { +{168, 901, 0, NULL, NULL}, +{776, 0, 2, compose_tab_12_1, hash_compose_tab_12_1}, +{913, 902, 0, NULL, NULL}, +{917, 904, 0, NULL, NULL}, +{919, 905, 0, NULL, NULL}, +{921, 906, 0, NULL, NULL}, +{927, 908, 0, NULL, NULL}, +{933, 910, 0, NULL, NULL}, +{937, 911, 0, NULL, NULL}, +{945, 940, 0, NULL, NULL}, +{949, 941, 0, NULL, NULL}, +{951, 942, 0, NULL, NULL}, +{953, 943, 0, NULL, NULL}, +{959, 972, 0, NULL, NULL}, +{965, 973, 0, NULL, NULL}, +{969, 974, 0, NULL, NULL}, +{978, 979, 0, NULL, NULL} +}; /* compose_tab_12 */ +static int hash_compose_tab_13[28] = +{-1,5,10,-1,-1,11,-1,-1,-1,0,-1,-1,-1,1,6,-1,-1,2,7,-1,12,8,13,3,-1,-1,4,9}; /* hash_compose_tab_13 */ +static CompEntry compose_tab_13[] = { +{65, 512, 0, NULL, NULL}, +{69, 516, 0, NULL, NULL}, +{73, 520, 0, NULL, NULL}, +{79, 524, 0, NULL, NULL}, +{82, 528, 0, NULL, NULL}, +{85, 532, 0, NULL, NULL}, +{97, 513, 0, NULL, NULL}, +{101, 517, 0, NULL, NULL}, +{105, 521, 0, NULL, NULL}, +{111, 525, 0, NULL, NULL}, +{114, 529, 0, NULL, NULL}, +{117, 533, 0, NULL, NULL}, +{1140, 1142, 0, NULL, NULL}, +{1141, 1143, 0, NULL, NULL} +}; /* compose_tab_13 */ +static int hash_compose_tab_14[24] = +{-1,2,6,-1,-1,7,-1,3,-1,8,4,-1,-1,5,-1,9,-1,0,10,-1,-1,1,11,-1}; /* hash_compose_tab_14 */ +static CompEntry compose_tab_14[] = { +{65, 514, 0, NULL, NULL}, +{69, 518, 0, NULL, NULL}, +{73, 522, 0, NULL, NULL}, +{79, 526, 0, NULL, NULL}, +{82, 530, 0, NULL, NULL}, +{85, 534, 0, NULL, NULL}, +{97, 515, 0, NULL, NULL}, +{101, 519, 0, NULL, NULL}, +{105, 523, 0, NULL, NULL}, +{111, 527, 0, NULL, NULL}, +{114, 531, 0, NULL, NULL}, +{117, 535, 0, NULL, NULL} +}; /* compose_tab_14 */ +static int hash_compose_tab_15_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_15_0 */ +static CompEntry compose_tab_15_0[] = { +{913, 8072, 0, NULL, NULL}, +{919, 8088, 0, NULL, NULL}, +{937, 8104, 0, NULL, NULL}, +{945, 8064, 0, NULL, NULL}, +{951, 8080, 0, NULL, NULL}, +{969, 8096, 0, NULL, NULL} +}; /* compose_tab_15_0 */ +static int hash_compose_tab_15[30] = +{-1,12,-1,-1,-1,13,-1,6,-1,14,-1,-1,-1,1,-1,7,-1,2,-1,3,8,4,9,10,-1,-1,-1,0,5, + 11}; /* hash_compose_tab_15 */ +static CompEntry compose_tab_15[] = { +{837, 0, 6, compose_tab_15_0, hash_compose_tab_15_0}, +{913, 7944, 0, NULL, NULL}, +{917, 7960, 0, NULL, NULL}, +{919, 7976, 0, NULL, NULL}, +{921, 7992, 0, NULL, NULL}, +{927, 8008, 0, NULL, NULL}, +{937, 8040, 0, NULL, NULL}, +{945, 7936, 0, NULL, NULL}, +{949, 7952, 0, NULL, NULL}, +{951, 7968, 0, NULL, NULL}, +{953, 7984, 0, NULL, NULL}, +{959, 8000, 0, NULL, NULL}, +{961, 8164, 0, NULL, NULL}, +{965, 8016, 0, NULL, NULL}, +{969, 8032, 0, NULL, NULL} +}; /* compose_tab_15 */ +static int hash_compose_tab_16_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_16_0 */ +static CompEntry compose_tab_16_0[] = { +{913, 8073, 0, NULL, NULL}, +{919, 8089, 0, NULL, NULL}, +{937, 8105, 0, NULL, NULL}, +{945, 8065, 0, NULL, NULL}, +{951, 8081, 0, NULL, NULL}, +{969, 8097, 0, NULL, NULL} +}; /* compose_tab_16_0 */ +static int hash_compose_tab_16[34] = +{11,3,12,4,-1,-1,-1,13,-1,5,14,6,-1,15,-1,7,-1,16,-1,8,-1,0,-1,-1,-1,-1,-1,9, + -1,1,-1,10,-1,2}; /* hash_compose_tab_16 */ +static CompEntry compose_tab_16[] = { +{837, 0, 6, compose_tab_16_0, hash_compose_tab_16_0}, +{913, 7945, 0, NULL, NULL}, +{917, 7961, 0, NULL, NULL}, +{919, 7977, 0, NULL, NULL}, +{921, 7993, 0, NULL, NULL}, +{927, 8009, 0, NULL, NULL}, +{929, 8172, 0, NULL, NULL}, +{933, 8025, 0, NULL, NULL}, +{937, 8041, 0, NULL, NULL}, +{945, 7937, 0, NULL, NULL}, +{949, 7953, 0, NULL, NULL}, +{951, 7969, 0, NULL, NULL}, +{953, 7985, 0, NULL, NULL}, +{959, 8001, 0, NULL, NULL}, +{961, 8165, 0, NULL, NULL}, +{965, 8017, 0, NULL, NULL}, +{969, 8033, 0, NULL, NULL} +}; /* compose_tab_16 */ +static int hash_compose_tab_17[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_17 */ +static CompEntry compose_tab_17[] = { +{79, 416, 0, NULL, NULL}, +{85, 431, 0, NULL, NULL}, +{111, 417, 0, NULL, NULL}, +{117, 432, 0, NULL, NULL} +}; /* compose_tab_17 */ +static int hash_compose_tab_18_38[8] = +{2,-1,-1,-1,-1,1,3,0}; /* hash_compose_tab_18_38 */ +static CompEntry compose_tab_18_38[] = { +{79, 7906, 0, NULL, NULL}, +{85, 7920, 0, NULL, NULL}, +{111, 7907, 0, NULL, NULL}, +{117, 7921, 0, NULL, NULL} +}; /* compose_tab_18_38 */ +static int hash_compose_tab_18[78] = +{9,10,-1,-1,11,12,13,14,15,16,-1,17,18,-1,-1,38,-1,-1,-1,19,20,-1,21,22,-1,-1, + 23,24,-1,25,26,27,28,29,-1,-1,30,31,32,33,34,35,-1,36,37,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,-1,2,3,-1,-1,4,5,-1,6,7,8}; /* hash_compose_tab_18 */ +static CompEntry compose_tab_18[] = { +{65, 7840, 0, NULL, NULL}, +{66, 7684, 0, NULL, NULL}, +{68, 7692, 0, NULL, NULL}, +{69, 7864, 0, NULL, NULL}, +{72, 7716, 0, NULL, NULL}, +{73, 7882, 0, NULL, NULL}, +{75, 7730, 0, NULL, NULL}, +{76, 7734, 0, NULL, NULL}, +{77, 7746, 0, NULL, NULL}, +{78, 7750, 0, NULL, NULL}, +{79, 7884, 0, NULL, NULL}, +{82, 7770, 0, NULL, NULL}, +{83, 7778, 0, NULL, NULL}, +{84, 7788, 0, NULL, NULL}, +{85, 7908, 0, NULL, NULL}, +{86, 7806, 0, NULL, NULL}, +{87, 7816, 0, NULL, NULL}, +{89, 7924, 0, NULL, NULL}, +{90, 7826, 0, NULL, NULL}, +{97, 7841, 0, NULL, NULL}, +{98, 7685, 0, NULL, NULL}, +{100, 7693, 0, NULL, NULL}, +{101, 7865, 0, NULL, NULL}, +{104, 7717, 0, NULL, NULL}, +{105, 7883, 0, NULL, NULL}, +{107, 7731, 0, NULL, NULL}, +{108, 7735, 0, NULL, NULL}, +{109, 7747, 0, NULL, NULL}, +{110, 7751, 0, NULL, NULL}, +{111, 7885, 0, NULL, NULL}, +{114, 7771, 0, NULL, NULL}, +{115, 7779, 0, NULL, NULL}, +{116, 7789, 0, NULL, NULL}, +{117, 7909, 0, NULL, NULL}, +{118, 7807, 0, NULL, NULL}, +{119, 7817, 0, NULL, NULL}, +{121, 7925, 0, NULL, NULL}, +{122, 7827, 0, NULL, NULL}, +{795, 0, 4, compose_tab_18_38, hash_compose_tab_18_38} +}; /* compose_tab_18 */ +static int hash_compose_tab_19[4] = +{-1,0,1,-1}; /* hash_compose_tab_19 */ +static CompEntry compose_tab_19[] = { +{85, 7794, 0, NULL, NULL}, +{117, 7795, 0, NULL, NULL} +}; /* compose_tab_19 */ +static int hash_compose_tab_20[4] = +{-1,0,1,-1}; /* hash_compose_tab_20 */ +static CompEntry compose_tab_20[] = { +{65, 7680, 0, NULL, NULL}, +{97, 7681, 0, NULL, NULL} +}; /* compose_tab_20 */ +static int hash_compose_tab_21[40] = +{-1,-1,7,8,9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,-1,-1,12,13,-1, + -1,0,1,14,15,2,3,16,17,4,5,18,6,19}; /* hash_compose_tab_21 */ +static CompEntry compose_tab_21[] = { +{67, 199, 0, NULL, NULL}, +{68, 7696, 0, NULL, NULL}, +{71, 290, 0, NULL, NULL}, +{72, 7720, 0, NULL, NULL}, +{75, 310, 0, NULL, NULL}, +{76, 315, 0, NULL, NULL}, +{78, 325, 0, NULL, NULL}, +{82, 342, 0, NULL, NULL}, +{83, 350, 0, NULL, NULL}, +{84, 354, 0, NULL, NULL}, +{99, 231, 0, NULL, NULL}, +{100, 7697, 0, NULL, NULL}, +{103, 291, 0, NULL, NULL}, +{104, 7721, 0, NULL, NULL}, +{107, 311, 0, NULL, NULL}, +{108, 316, 0, NULL, NULL}, +{110, 326, 0, NULL, NULL}, +{114, 343, 0, NULL, NULL}, +{115, 351, 0, NULL, NULL}, +{116, 355, 0, NULL, NULL} +}; /* compose_tab_21 */ +static int hash_compose_tab_22[20] = +{-1,6,-1,-1,-1,0,4,7,-1,1,-1,8,-1,2,-1,-1,-1,5,9,3}; /* hash_compose_tab_22 */ +static CompEntry compose_tab_22[] = { +{65, 260, 0, NULL, NULL}, +{69, 280, 0, NULL, NULL}, +{73, 302, 0, NULL, NULL}, +{79, 490, 0, NULL, NULL}, +{85, 370, 0, NULL, NULL}, +{97, 261, 0, NULL, NULL}, +{101, 281, 0, NULL, NULL}, +{105, 303, 0, NULL, NULL}, +{111, 491, 0, NULL, NULL}, +{117, 371, 0, NULL, NULL} +}; /* compose_tab_22 */ +static int hash_compose_tab_23[24] = +{-1,-1,-1,-1,2,6,3,7,-1,-1,-1,-1,4,5,8,9,-1,-1,-1,-1,0,1,10,11}; /* hash_compose_tab_23 */ +static CompEntry compose_tab_23[] = { +{68, 7698, 0, NULL, NULL}, +{69, 7704, 0, NULL, NULL}, +{76, 7740, 0, NULL, NULL}, +{78, 7754, 0, NULL, NULL}, +{84, 7792, 0, NULL, NULL}, +{85, 7798, 0, NULL, NULL}, +{100, 7699, 0, NULL, NULL}, +{101, 7705, 0, NULL, NULL}, +{108, 7741, 0, NULL, NULL}, +{110, 7755, 0, NULL, NULL}, +{116, 7793, 0, NULL, NULL}, +{117, 7799, 0, NULL, NULL} +}; /* compose_tab_23 */ +static int hash_compose_tab_24[4] = +{0,1,-1,-1}; /* hash_compose_tab_24 */ +static CompEntry compose_tab_24[] = { +{72, 7722, 0, NULL, NULL}, +{104, 7723, 0, NULL, NULL} +}; /* compose_tab_24 */ +static int hash_compose_tab_25[12] = +{-1,1,2,-1,-1,3,-1,-1,-1,0,4,5}; /* hash_compose_tab_25 */ +static CompEntry compose_tab_25[] = { +{69, 7706, 0, NULL, NULL}, +{73, 7724, 0, NULL, NULL}, +{85, 7796, 0, NULL, NULL}, +{101, 7707, 0, NULL, NULL}, +{105, 7725, 0, NULL, NULL}, +{117, 7797, 0, NULL, NULL} +}; /* compose_tab_25 */ +static int hash_compose_tab_26[34] = +{1,-1,10,-1,-1,11,12,2,3,13,4,-1,14,-1,5,15,6,-1,-1,-1,16,-1,7,-1,-1,-1,-1,-1, + -1,-1,8,-1,0,9}; /* hash_compose_tab_26 */ +static CompEntry compose_tab_26[] = { +{66, 7686, 0, NULL, NULL}, +{68, 7694, 0, NULL, NULL}, +{75, 7732, 0, NULL, NULL}, +{76, 7738, 0, NULL, NULL}, +{78, 7752, 0, NULL, NULL}, +{82, 7774, 0, NULL, NULL}, +{84, 7790, 0, NULL, NULL}, +{90, 7828, 0, NULL, NULL}, +{98, 7687, 0, NULL, NULL}, +{100, 7695, 0, NULL, NULL}, +{104, 7830, 0, NULL, NULL}, +{107, 7733, 0, NULL, NULL}, +{108, 7739, 0, NULL, NULL}, +{110, 7753, 0, NULL, NULL}, +{114, 7775, 0, NULL, NULL}, +{116, 7791, 0, NULL, NULL}, +{122, 7829, 0, NULL, NULL} +}; /* compose_tab_26 */ +static int hash_compose_tab_27_1[4] = +{-1,0,1,-1}; /* hash_compose_tab_27_1 */ +static CompEntry compose_tab_27_1[] = { +{953, 8151, 0, NULL, NULL}, +{965, 8167, 0, NULL, NULL} +}; /* compose_tab_27_1 */ +static int hash_compose_tab_27_2_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_27_2_0 */ +static CompEntry compose_tab_27_2_0[] = { +{913, 8078, 0, NULL, NULL}, +{919, 8094, 0, NULL, NULL}, +{937, 8110, 0, NULL, NULL}, +{945, 8070, 0, NULL, NULL}, +{951, 8086, 0, NULL, NULL}, +{969, 8102, 0, NULL, NULL} +}; /* compose_tab_27_2_0 */ +static int hash_compose_tab_27_2[20] = +{-1,3,-1,-1,-1,5,8,-1,-1,9,-1,6,-1,1,7,-1,-1,0,4,2}; /* hash_compose_tab_27_2 */ +static CompEntry compose_tab_27_2[] = { +{837, 0, 6, compose_tab_27_2_0, hash_compose_tab_27_2_0}, +{913, 7950, 0, NULL, NULL}, +{919, 7982, 0, NULL, NULL}, +{921, 7998, 0, NULL, NULL}, +{937, 8046, 0, NULL, NULL}, +{945, 7942, 0, NULL, NULL}, +{951, 7974, 0, NULL, NULL}, +{953, 7990, 0, NULL, NULL}, +{965, 8022, 0, NULL, NULL}, +{969, 8038, 0, NULL, NULL} +}; /* compose_tab_27_2 */ +static int hash_compose_tab_27_3_0[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_27_3_0 */ +static CompEntry compose_tab_27_3_0[] = { +{913, 8079, 0, NULL, NULL}, +{919, 8095, 0, NULL, NULL}, +{937, 8111, 0, NULL, NULL}, +{945, 8071, 0, NULL, NULL}, +{951, 8087, 0, NULL, NULL}, +{969, 8103, 0, NULL, NULL} +}; /* compose_tab_27_3_0 */ +static int hash_compose_tab_27_3[22] = +{-1,0,10,-1,-1,7,-1,8,-1,4,-1,1,-1,5,-1,-1,-1,2,-1,3,9,6}; /* hash_compose_tab_27_3 */ +static CompEntry compose_tab_27_3[] = { +{837, 0, 6, compose_tab_27_3_0, hash_compose_tab_27_3_0}, +{913, 7951, 0, NULL, NULL}, +{919, 7983, 0, NULL, NULL}, +{921, 7999, 0, NULL, NULL}, +{933, 8031, 0, NULL, NULL}, +{937, 8047, 0, NULL, NULL}, +{945, 7943, 0, NULL, NULL}, +{951, 7975, 0, NULL, NULL}, +{953, 7991, 0, NULL, NULL}, +{965, 8023, 0, NULL, NULL}, +{969, 8039, 0, NULL, NULL} +}; /* compose_tab_27_3 */ +static int hash_compose_tab_27_4[6] = +{-1,-1,-1,0,1,2}; /* hash_compose_tab_27_4 */ +static CompEntry compose_tab_27_4[] = { +{945, 8119, 0, NULL, NULL}, +{951, 8135, 0, NULL, NULL}, +{969, 8183, 0, NULL, NULL} +}; /* compose_tab_27_4 */ +static int hash_compose_tab_27[24] = +{0,-1,-1,-1,-1,8,11,-1,1,5,9,-1,-1,-1,-1,6,10,7,-1,2,3,4,-1,-1}; /* hash_compose_tab_27 */ +static CompEntry compose_tab_27[] = { +{168, 8129, 0, NULL, NULL}, +{776, 0, 2, compose_tab_27_1, hash_compose_tab_27_1}, +{787, 0, 10, compose_tab_27_2, hash_compose_tab_27_2}, +{788, 0, 11, compose_tab_27_3, hash_compose_tab_27_3}, +{837, 0, 3, compose_tab_27_4, hash_compose_tab_27_4}, +{945, 8118, 0, NULL, NULL}, +{951, 8134, 0, NULL, NULL}, +{953, 8150, 0, NULL, NULL}, +{965, 8166, 0, NULL, NULL}, +{969, 8182, 0, NULL, NULL}, +{8127, 8143, 0, NULL, NULL}, +{8190, 8159, 0, NULL, NULL} +}; /* compose_tab_27 */ +static int hash_compose_tab_28[12] = +{-1,0,2,4,-1,-1,-1,1,-1,3,5,-1}; /* hash_compose_tab_28 */ +static CompEntry compose_tab_28[] = { +{913, 8124, 0, NULL, NULL}, +{919, 8140, 0, NULL, NULL}, +{937, 8188, 0, NULL, NULL}, +{945, 8115, 0, NULL, NULL}, +{951, 8131, 0, NULL, NULL}, +{969, 8179, 0, NULL, NULL} +}; /* compose_tab_28 */ +static int hash_compose_tab_29[4] = +{0,-1,1,-1}; /* hash_compose_tab_29 */ +static CompEntry compose_tab_29[] = { +{1488, 64302, 0, NULL, NULL}, +{1522, 64287, 0, NULL, NULL} +}; /* compose_tab_29 */ +static int hash_compose_tab_30[2] = +{0,-1}; /* hash_compose_tab_30 */ +static CompEntry compose_tab_30[] = { +{1488, 64303, 0, NULL, NULL} +}; /* compose_tab_30 */ +static int hash_compose_tab_31[2] = +{-1,0}; /* hash_compose_tab_31 */ +static CompEntry compose_tab_31[] = { +{1493, 64331, 0, NULL, NULL} +}; /* compose_tab_31 */ +static int hash_compose_tab_32[44] = +{7,8,9,10,11,-1,12,-1,13,14,-1,15,16,-1,17,18,19,20,21,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,-1}; /* hash_compose_tab_32 */ +static CompEntry compose_tab_32[] = { +{1488, 64304, 0, NULL, NULL}, +{1489, 64305, 0, NULL, NULL}, +{1490, 64306, 0, NULL, NULL}, +{1491, 64307, 0, NULL, NULL}, +{1492, 64308, 0, NULL, NULL}, +{1493, 64309, 0, NULL, NULL}, +{1494, 64310, 0, NULL, NULL}, +{1496, 64312, 0, NULL, NULL}, +{1497, 64313, 0, NULL, NULL}, +{1498, 64314, 0, NULL, NULL}, +{1499, 64315, 0, NULL, NULL}, +{1500, 64316, 0, NULL, NULL}, +{1502, 64318, 0, NULL, NULL}, +{1504, 64320, 0, NULL, NULL}, +{1505, 64321, 0, NULL, NULL}, +{1507, 64323, 0, NULL, NULL}, +{1508, 64324, 0, NULL, NULL}, +{1510, 64326, 0, NULL, NULL}, +{1511, 64327, 0, NULL, NULL}, +{1512, 64328, 0, NULL, NULL}, +{1513, 64329, 0, NULL, NULL}, +{1514, 64330, 0, NULL, NULL} +}; /* compose_tab_32 */ +static int hash_compose_tab_33[6] = +{-1,0,2,-1,-1,1}; /* hash_compose_tab_33 */ +static CompEntry compose_tab_33[] = { +{1489, 64332, 0, NULL, NULL}, +{1499, 64333, 0, NULL, NULL}, +{1508, 64334, 0, NULL, NULL} +}; /* compose_tab_33 */ +static int hash_compose_tab_34_0[2] = +{-1,0}; /* hash_compose_tab_34_0 */ +static CompEntry compose_tab_34_0[] = { +{1513, 64300, 0, NULL, NULL} +}; /* compose_tab_34_0 */ +static int hash_compose_tab_34[4] = +{0,1,-1,-1}; /* hash_compose_tab_34 */ +static CompEntry compose_tab_34[] = { +{1468, 0, 1, compose_tab_34_0, hash_compose_tab_34_0}, +{1513, 64298, 0, NULL, NULL} +}; /* compose_tab_34 */ +static int hash_compose_tab_35_0[2] = +{-1,0}; /* hash_compose_tab_35_0 */ +static CompEntry compose_tab_35_0[] = { +{1513, 64301, 0, NULL, NULL} +}; /* compose_tab_35_0 */ +static int hash_compose_tab_35[4] = +{0,1,-1,-1}; /* hash_compose_tab_35 */ +static CompEntry compose_tab_35[] = { +{1468, 0, 1, compose_tab_35_0, hash_compose_tab_35_0}, +{1513, 64299, 0, NULL, NULL} +}; /* compose_tab_35 */ +static int hash_compose_tab_36[22] = +{3,10,-1,-1,-1,4,5,-1,-1,-1,-1,-1,6,-1,-1,0,1,2,7,8,9,-1}; /* hash_compose_tab_36 */ +static CompEntry compose_tab_36[] = { +{2325, 2392, 0, NULL, NULL}, +{2326, 2393, 0, NULL, NULL}, +{2327, 2394, 0, NULL, NULL}, +{2332, 2395, 0, NULL, NULL}, +{2337, 2396, 0, NULL, NULL}, +{2338, 2397, 0, NULL, NULL}, +{2344, 2345, 0, NULL, NULL}, +{2347, 2398, 0, NULL, NULL}, +{2351, 2399, 0, NULL, NULL}, +{2352, 2353, 0, NULL, NULL}, +{2355, 2356, 0, NULL, NULL} +}; /* compose_tab_36 */ +static int hash_compose_tab_37[8] = +{-1,0,1,-1,2,-1,-1,3}; /* hash_compose_tab_37 */ +static CompEntry compose_tab_37[] = { +{2465, 2524, 0, NULL, NULL}, +{2466, 2525, 0, NULL, NULL}, +{2476, 2480, 0, NULL, NULL}, +{2479, 2527, 0, NULL, NULL} +}; /* compose_tab_37 */ +static int hash_compose_tab_38[2] = +{-1,0}; /* hash_compose_tab_38 */ +static CompEntry compose_tab_38[] = { +{2503, 2507, 0, NULL, NULL} +}; /* compose_tab_38 */ +static int hash_compose_tab_39[2] = +{-1,0}; /* hash_compose_tab_39 */ +static CompEntry compose_tab_39[] = { +{2503, 2508, 0, NULL, NULL} +}; /* compose_tab_39 */ +static int hash_compose_tab_40[10] = +{-1,-1,0,1,3,4,-1,-1,2,-1}; /* hash_compose_tab_40 */ +static CompEntry compose_tab_40[] = { +{2582, 2649, 0, NULL, NULL}, +{2583, 2650, 0, NULL, NULL}, +{2588, 2651, 0, NULL, NULL}, +{2593, 2652, 0, NULL, NULL}, +{2603, 2654, 0, NULL, NULL} +}; /* compose_tab_40 */ +static int hash_compose_tab_41[6] = +{1,2,-1,-1,-1,0}; /* hash_compose_tab_41 */ +static CompEntry compose_tab_41[] = { +{2849, 2908, 0, NULL, NULL}, +{2850, 2909, 0, NULL, NULL}, +{2863, 2911, 0, NULL, NULL} +}; /* compose_tab_41 */ +static int hash_compose_tab_42[2] = +{-1,0}; /* hash_compose_tab_42 */ +static CompEntry compose_tab_42[] = { +{2887, 2891, 0, NULL, NULL} +}; /* compose_tab_42 */ +static int hash_compose_tab_43[2] = +{-1,0}; /* hash_compose_tab_43 */ +static CompEntry compose_tab_43[] = { +{2887, 2888, 0, NULL, NULL} +}; /* compose_tab_43 */ +static int hash_compose_tab_44[2] = +{-1,0}; /* hash_compose_tab_44 */ +static CompEntry compose_tab_44[] = { +{2887, 2892, 0, NULL, NULL} +}; /* compose_tab_44 */ +static int hash_compose_tab_45[4] = +{-1,-1,0,1}; /* hash_compose_tab_45 */ +static CompEntry compose_tab_45[] = { +{3014, 3018, 0, NULL, NULL}, +{3015, 3019, 0, NULL, NULL} +}; /* compose_tab_45 */ +static int hash_compose_tab_46[4] = +{-1,-1,0,1}; /* hash_compose_tab_46 */ +static CompEntry compose_tab_46[] = { +{2962, 2964, 0, NULL, NULL}, +{3014, 3020, 0, NULL, NULL} +}; /* compose_tab_46 */ +static int hash_compose_tab_47[2] = +{0,-1}; /* hash_compose_tab_47 */ +static CompEntry compose_tab_47[] = { +{3142, 3144, 0, NULL, NULL} +}; /* compose_tab_47 */ +static int hash_compose_tab_48[2] = +{0,-1}; /* hash_compose_tab_48 */ +static CompEntry compose_tab_48[] = { +{3270, 3274, 0, NULL, NULL} +}; /* compose_tab_48 */ +static int hash_compose_tab_49_1[2] = +{0,-1}; /* hash_compose_tab_49_1 */ +static CompEntry compose_tab_49_1[] = { +{3270, 3275, 0, NULL, NULL} +}; /* compose_tab_49_1 */ +static int hash_compose_tab_49[6] = +{2,-1,1,-1,-1,0}; /* hash_compose_tab_49 */ +static CompEntry compose_tab_49[] = { +{3263, 3264, 0, NULL, NULL}, +{3266, 0, 1, compose_tab_49_1, hash_compose_tab_49_1}, +{3270, 3271, 0, NULL, NULL} +}; /* compose_tab_49 */ +static int hash_compose_tab_50[2] = +{0,-1}; /* hash_compose_tab_50 */ +static CompEntry compose_tab_50[] = { +{3270, 3272, 0, NULL, NULL} +}; /* compose_tab_50 */ +static int hash_compose_tab_51[4] = +{-1,-1,0,1}; /* hash_compose_tab_51 */ +static CompEntry compose_tab_51[] = { +{3398, 3402, 0, NULL, NULL}, +{3399, 3403, 0, NULL, NULL} +}; /* compose_tab_51 */ +static int hash_compose_tab_52[2] = +{0,-1}; /* hash_compose_tab_52 */ +static CompEntry compose_tab_52[] = { +{3398, 3404, 0, NULL, NULL} +}; /* compose_tab_52 */ +static int hash_compose_tab_53[2] = +{-1,0}; /* hash_compose_tab_53 */ +static CompEntry compose_tab_53[] = { +{3661, 3635, 0, NULL, NULL} +}; /* compose_tab_53 */ +static int hash_compose_tab_54[2] = +{-1,0}; /* hash_compose_tab_54 */ +static CompEntry compose_tab_54[] = { +{3789, 3763, 0, NULL, NULL} +}; /* compose_tab_54 */ +static int hash_compose_tab_55_2[4] = +{-1,-1,0,1}; /* hash_compose_tab_55_2 */ +static CompEntry compose_tab_55_2[] = { +{4018, 3959, 0, NULL, NULL}, +{4019, 3961, 0, NULL, NULL} +}; /* compose_tab_55_2 */ +static int hash_compose_tab_55[6] = +{0,-1,1,2,-1,-1}; /* hash_compose_tab_55 */ +static CompEntry compose_tab_55[] = { +{3954, 3955, 0, NULL, NULL}, +{3956, 3957, 0, NULL, NULL}, +{3968, 0, 2, compose_tab_55_2, hash_compose_tab_55_2} +}; /* compose_tab_55 */ +static int hash_compose_tab_56[4] = +{-1,-1,0,1}; /* hash_compose_tab_56 */ +static CompEntry compose_tab_56[] = { +{4018, 3958, 0, NULL, NULL}, +{4019, 3960, 0, NULL, NULL} +}; /* compose_tab_56 */ +static int hash_compose_tab_57[4] = +{0,1,-1,-1}; /* hash_compose_tab_57 */ +static CompEntry compose_tab_57[] = { +{3904, 3945, 0, NULL, NULL}, +{3984, 4025, 0, NULL, NULL} +}; /* compose_tab_57 */ +static int hash_compose_tab_58[20] = +{-1,2,7,-1,-1,-1,0,3,5,8,-1,4,9,-1,-1,-1,1,6,-1,-1}; /* hash_compose_tab_58 */ +static CompEntry compose_tab_58[] = { +{3906, 3907, 0, NULL, NULL}, +{3916, 3917, 0, NULL, NULL}, +{3921, 3922, 0, NULL, NULL}, +{3926, 3927, 0, NULL, NULL}, +{3931, 3932, 0, NULL, NULL}, +{3986, 3987, 0, NULL, NULL}, +{3996, 3997, 0, NULL, NULL}, +{4001, 4002, 0, NULL, NULL}, +{4006, 4007, 0, NULL, NULL}, +{4011, 4012, 0, NULL, NULL} +}; /* compose_tab_58 */ +static int hash_compose_tab_59[96] = +{33,12,34,-1,13,35,14,36,15,37,-1,-1,-1,-1,-1,16,38,-1,17,39,-1,18,40,-1,19, + 41,-1,20,42,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,43,44,45, + 46,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,47,-1,-1,-1,-1,-1,-1,-1,0,22,-1,-1,-1,1, + 23,2,24,3,25,4,26,5,27,6,28,7,29,8,30,9,31,10,32,11}; /* hash_compose_tab_59 */ +static CompEntry compose_tab_59[] = { +{12358, 12436, 0, NULL, NULL}, +{12363, 12364, 0, NULL, NULL}, +{12365, 12366, 0, NULL, NULL}, +{12367, 12368, 0, NULL, NULL}, +{12369, 12370, 0, NULL, NULL}, +{12371, 12372, 0, NULL, NULL}, +{12373, 12374, 0, NULL, NULL}, +{12375, 12376, 0, NULL, NULL}, +{12377, 12378, 0, NULL, NULL}, +{12379, 12380, 0, NULL, NULL}, +{12381, 12382, 0, NULL, NULL}, +{12383, 12384, 0, NULL, NULL}, +{12385, 12386, 0, NULL, NULL}, +{12388, 12389, 0, NULL, NULL}, +{12390, 12391, 0, NULL, NULL}, +{12392, 12393, 0, NULL, NULL}, +{12399, 12400, 0, NULL, NULL}, +{12402, 12403, 0, NULL, NULL}, +{12405, 12406, 0, NULL, NULL}, +{12408, 12409, 0, NULL, NULL}, +{12411, 12412, 0, NULL, NULL}, +{12445, 12446, 0, NULL, NULL}, +{12454, 12532, 0, NULL, NULL}, +{12459, 12460, 0, NULL, NULL}, +{12461, 12462, 0, NULL, NULL}, +{12463, 12464, 0, NULL, NULL}, +{12465, 12466, 0, NULL, NULL}, +{12467, 12468, 0, NULL, NULL}, +{12469, 12470, 0, NULL, NULL}, +{12471, 12472, 0, NULL, NULL}, +{12473, 12474, 0, NULL, NULL}, +{12475, 12476, 0, NULL, NULL}, +{12477, 12478, 0, NULL, NULL}, +{12479, 12480, 0, NULL, NULL}, +{12481, 12482, 0, NULL, NULL}, +{12484, 12485, 0, NULL, NULL}, +{12486, 12487, 0, NULL, NULL}, +{12488, 12489, 0, NULL, NULL}, +{12495, 12496, 0, NULL, NULL}, +{12498, 12499, 0, NULL, NULL}, +{12501, 12502, 0, NULL, NULL}, +{12504, 12505, 0, NULL, NULL}, +{12507, 12508, 0, NULL, NULL}, +{12527, 12535, 0, NULL, NULL}, +{12528, 12536, 0, NULL, NULL}, +{12529, 12537, 0, NULL, NULL}, +{12530, 12538, 0, NULL, NULL}, +{12541, 12542, 0, NULL, NULL} +}; /* compose_tab_59 */ +static int hash_compose_tab_60[20] = +{-1,7,1,-1,8,2,-1,9,3,-1,-1,4,-1,-1,-1,5,-1,-1,6,0}; /* hash_compose_tab_60 */ +static CompEntry compose_tab_60[] = { +{12399, 12401, 0, NULL, NULL}, +{12402, 12404, 0, NULL, NULL}, +{12405, 12407, 0, NULL, NULL}, +{12408, 12410, 0, NULL, NULL}, +{12411, 12413, 0, NULL, NULL}, +{12495, 12497, 0, NULL, NULL}, +{12498, 12500, 0, NULL, NULL}, +{12501, 12503, 0, NULL, NULL}, +{12504, 12506, 0, NULL, NULL}, +{12507, 12509, 0, NULL, NULL} +}; /* compose_tab_60 */ +static int hash_compose_tab[122] = +{30,31,52,60,32,-1,-1,33,-1,34,35,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,-1,5,6,7,8,9,10,11,12,36,13,37,14, + 38,15,16,55,40,-1,-1,-1,-1,17,56,-1,-1,-1,-1,-1,41,18,19,20,42,21,22,-1,45, + 39,-1,23,24,-1,25,26,-1,-1,-1,-1,-1,-1,-1,-1,48,-1,43,44,51,53,-1,-1,27,46, + 54,28,-1,-1,47,-1,-1,-1,-1,49,50,-1,-1,57,-1,58,59,29}; /* hash_compose_tab */ +static CompEntry compose_tab[] = { +{768, 0, 39, compose_tab_0, hash_compose_tab_0}, +{769, 0, 70, compose_tab_1, hash_compose_tab_1}, +{770, 0, 27, compose_tab_2, hash_compose_tab_2}, +{771, 0, 19, compose_tab_3, hash_compose_tab_3}, +{772, 0, 28, compose_tab_4, hash_compose_tab_4}, +{774, 0, 30, compose_tab_5, hash_compose_tab_5}, +{775, 0, 40, compose_tab_6, hash_compose_tab_6}, +{776, 0, 50, compose_tab_7, hash_compose_tab_7}, +{777, 0, 15, compose_tab_8, hash_compose_tab_8}, +{778, 0, 6, compose_tab_9, hash_compose_tab_9}, +{779, 0, 6, compose_tab_10, hash_compose_tab_10}, +{780, 0, 34, compose_tab_11, hash_compose_tab_11}, +{781, 0, 17, compose_tab_12, hash_compose_tab_12}, +{783, 0, 14, compose_tab_13, hash_compose_tab_13}, +{785, 0, 12, compose_tab_14, hash_compose_tab_14}, +{787, 0, 15, compose_tab_15, hash_compose_tab_15}, +{788, 0, 17, compose_tab_16, hash_compose_tab_16}, +{795, 0, 4, compose_tab_17, hash_compose_tab_17}, +{803, 0, 39, compose_tab_18, hash_compose_tab_18}, +{804, 0, 2, compose_tab_19, hash_compose_tab_19}, +{805, 0, 2, compose_tab_20, hash_compose_tab_20}, +{807, 0, 20, compose_tab_21, hash_compose_tab_21}, +{808, 0, 10, compose_tab_22, hash_compose_tab_22}, +{813, 0, 12, compose_tab_23, hash_compose_tab_23}, +{814, 0, 2, compose_tab_24, hash_compose_tab_24}, +{816, 0, 6, compose_tab_25, hash_compose_tab_25}, +{817, 0, 17, compose_tab_26, hash_compose_tab_26}, +{834, 0, 12, compose_tab_27, hash_compose_tab_27}, +{837, 0, 6, compose_tab_28, hash_compose_tab_28}, +{1463, 0, 2, compose_tab_29, hash_compose_tab_29}, +{1464, 0, 1, compose_tab_30, hash_compose_tab_30}, +{1465, 0, 1, compose_tab_31, hash_compose_tab_31}, +{1468, 0, 22, compose_tab_32, hash_compose_tab_32}, +{1471, 0, 3, compose_tab_33, hash_compose_tab_33}, +{1473, 0, 2, compose_tab_34, hash_compose_tab_34}, +{1474, 0, 2, compose_tab_35, hash_compose_tab_35}, +{2364, 0, 11, compose_tab_36, hash_compose_tab_36}, +{2492, 0, 4, compose_tab_37, hash_compose_tab_37}, +{2494, 0, 1, compose_tab_38, hash_compose_tab_38}, +{2519, 0, 1, compose_tab_39, hash_compose_tab_39}, +{2620, 0, 5, compose_tab_40, hash_compose_tab_40}, +{2876, 0, 3, compose_tab_41, hash_compose_tab_41}, +{2878, 0, 1, compose_tab_42, hash_compose_tab_42}, +{2902, 0, 1, compose_tab_43, hash_compose_tab_43}, +{2903, 0, 1, compose_tab_44, hash_compose_tab_44}, +{3006, 0, 2, compose_tab_45, hash_compose_tab_45}, +{3031, 0, 2, compose_tab_46, hash_compose_tab_46}, +{3158, 0, 1, compose_tab_47, hash_compose_tab_47}, +{3266, 0, 1, compose_tab_48, hash_compose_tab_48}, +{3285, 0, 3, compose_tab_49, hash_compose_tab_49}, +{3286, 0, 1, compose_tab_50, hash_compose_tab_50}, +{3390, 0, 2, compose_tab_51, hash_compose_tab_51}, +{3415, 0, 1, compose_tab_52, hash_compose_tab_52}, +{3634, 0, 1, compose_tab_53, hash_compose_tab_53}, +{3762, 0, 1, compose_tab_54, hash_compose_tab_54}, +{3953, 0, 3, compose_tab_55, hash_compose_tab_55}, +{3968, 0, 2, compose_tab_56, hash_compose_tab_56}, +{4021, 0, 2, compose_tab_57, hash_compose_tab_57}, +{4023, 0, 10, compose_tab_58, hash_compose_tab_58}, +{12441, 0, 48, compose_tab_59, hash_compose_tab_59}, +{12442, 0, 10, compose_tab_60, hash_compose_tab_60} +}; /* compose_tab */ +#define COMP_CANDIDATE_MAP_OFFSET 24 +static Uint32 comp_candidate_map[] = { + 0x081ABFDFU, + 0x000361B8U, + 0x00000024U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x93800000U, + 0x00000006U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x10000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x50000000U, + 0x00800000U, + 0x00000000U, + 0x00000000U, + 0x10000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x50000000U, + 0x00C00000U, + 0x00000000U, + 0x00000000U, + 0x40000000U, + 0x00800000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00400000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00600004U, + 0x00000000U, + 0x00000000U, + 0x40000000U, + 0x00800000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00040000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00040000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00000000U, + 0x00020000U, + 0x00000001U, + 0x00A00000U +}; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 89c6625550..6e86de3ebf 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1596,6 +1596,17 @@ Sint erts_binary_set_loop_limit(Sint limit); /* erl_unicode.c */ void erts_init_unicode(void); Sint erts_unicode_set_loop_limit(Sint limit); + +void erts_native_filename_put(Eterm ioterm, int encoding, byte *p) ; +Sint erts_native_filename_need(Eterm ioterm, int encoding); +void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars); +int erts_analyze_utf8(byte *source, Uint size, + byte **err_pos, Uint *num_chars, int *left); +#define ERTS_UTF8_OK 0 +#define ERTS_UTF8_INCOMPLETE 1 +#define ERTS_UTF8_ERROR 2 +#define ERTS_UTF8_ANALYZE_MORE 3 + /* erl_trace.c */ void erts_init_trace(void); void erts_trace_check_exiting(Eterm exiting); diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 57f2b2f16c..27c5f99320 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1260,7 +1260,8 @@ char* win32_errorstr(int); #define ERL_FILENAME_UNKNOWN 0 #define ERL_FILENAME_LATIN1 1 #define ERL_FILENAME_UTF8 2 -#define ERL_FILENAME_WIN_WCHAR 3 +#define ERL_FILENAME_UTF8_MAC 3 +#define ERL_FILENAME_WIN_WCHAR 4 int erts_get_native_filename_encoding(void); /* The set function is only to be used by erl_init! */ diff --git a/erts/emulator/internal_doc/dec.dat b/erts/emulator/internal_doc/dec.dat new file mode 100644 index 0000000000..771ef51baa --- /dev/null +++ b/erts/emulator/internal_doc/dec.dat @@ -0,0 +1,942 @@ +{[59],894}. +{[96],8175}. +{[180],8189}. +{[183],903}. +{[198],1236}. +{[230],1237}. +{[399],1240}. +{[415],1256}. +{[439],1248}. +{[601],1241}. +{[629],1257}. +{[658],1249}. +{[697],884}. +{[768],832}. +{[768,65],192}. +{[768,69],200}. +{[768,73],204}. +{[768,79],210}. +{[768,85],217}. +{[768,87],7808}. +{[768,89],7922}. +{[768,97],224}. +{[768,101],232}. +{[768,105],236}. +{[768,111],242}. +{[768,117],249}. +{[768,119],7809}. +{[768,121],7923}. +{[768,168],8173}. +{[768,770,65],7846}. +{[768,770,69],7872}. +{[768,770,79],7890}. +{[768,770,97],7847}. +{[768,770,101],7873}. +{[768,770,111],7891}. +{[768,772,69],7700}. +{[768,772,79],7760}. +{[768,772,101],7701}. +{[768,772,111],7761}. +{[768,774,65],7856}. +{[768,774,97],7857}. +{[768,776,85],475}. +{[768,776,117],476}. +{[768,776,953],8146}. +{[768,776,965],8162}. +{[768,787,837,913],8074}. +{[768,787,837,919],8090}. +{[768,787,837,937],8106}. +{[768,787,837,945],8066}. +{[768,787,837,951],8082}. +{[768,787,837,969],8098}. +{[768,787,913],7946}. +{[768,787,917],7962}. +{[768,787,919],7978}. +{[768,787,921],7994}. +{[768,787,927],8010}. +{[768,787,937],8042}. +{[768,787,945],7938}. +{[768,787,949],7954}. +{[768,787,951],7970}. +{[768,787,953],7986}. +{[768,787,959],8002}. +{[768,787,965],8018}. +{[768,787,969],8034}. +{[768,788,837,913],8075}. +{[768,788,837,919],8091}. +{[768,788,837,937],8107}. +{[768,788,837,945],8067}. +{[768,788,837,951],8083}. +{[768,788,837,969],8099}. +{[768,788,913],7947}. +{[768,788,917],7963}. +{[768,788,919],7979}. +{[768,788,921],7995}. +{[768,788,927],8011}. +{[768,788,933],8027}. +{[768,788,937],8043}. +{[768,788,945],7939}. +{[768,788,949],7955}. +{[768,788,951],7971}. +{[768,788,953],7987}. +{[768,788,959],8003}. +{[768,788,965],8019}. +{[768,788,969],8035}. +{[768,795,79],7900}. +{[768,795,85],7914}. +{[768,795,111],7901}. +{[768,795,117],7915}. +{[768,837,945],8114}. +{[768,837,951],8130}. +{[768,837,969],8178}. +{[768,913],8122}. +{[768,917],8136}. +{[768,919],8138}. +{[768,921],8154}. +{[768,927],8184}. +{[768,933],8170}. +{[768,937],8186}. +{[768,945],8048}. +{[768,949],8050}. +{[768,951],8052}. +{[768,953],8054}. +{[768,959],8056}. +{[768,965],8058}. +{[768,969],8060}. +{[768,8127],8141}. +{[768,8190],8157}. +{[769],833}. +{[769,65],193}. +{[769,67],262}. +{[769,69],201}. +{[769,71],500}. +{[769,73],205}. +{[769,75],7728}. +{[769,76],313}. +{[769,77],7742}. +{[769,78],323}. +{[769,79],211}. +{[769,80],7764}. +{[769,82],340}. +{[769,83],346}. +{[769,85],218}. +{[769,87],7810}. +{[769,89],221}. +{[769,90],377}. +{[769,97],225}. +{[769,99],263}. +{[769,101],233}. +{[769,103],501}. +{[769,105],237}. +{[769,107],7729}. +{[769,108],314}. +{[769,109],7743}. +{[769,110],324}. +{[769,111],243}. +{[769,112],7765}. +{[769,114],341}. +{[769,115],347}. +{[769,117],250}. +{[769,119],7811}. +{[769,121],253}. +{[769,122],378}. +{[769,168],8174}. +{[769,198],508}. +{[769,216],510}. +{[769,230],509}. +{[769,248],511}. +{[769,770,65],7844}. +{[769,770,69],7870}. +{[769,770,79],7888}. +{[769,770,97],7845}. +{[769,770,101],7871}. +{[769,770,111],7889}. +{[769,771,79],7756}. +{[769,771,85],7800}. +{[769,771,111],7757}. +{[769,771,117],7801}. +{[769,772,69],7702}. +{[769,772,79],7762}. +{[769,772,101],7703}. +{[769,772,111],7763}. +{[769,774,65],7854}. +{[769,774,97],7855}. +{[769,776,73],7726}. +{[769,776,85],471}. +{[769,776,105],7727}. +{[769,776,117],472}. +{[769,776,953],8147}. +{[769,776,965],8163}. +{[769,778,65],506}. +{[769,778,97],507}. +{[769,787,837,913],8076}. +{[769,787,837,919],8092}. +{[769,787,837,937],8108}. +{[769,787,837,945],8068}. +{[769,787,837,951],8084}. +{[769,787,837,969],8100}. +{[769,787,913],7948}. +{[769,787,917],7964}. +{[769,787,919],7980}. +{[769,787,921],7996}. +{[769,787,927],8012}. +{[769,787,937],8044}. +{[769,787,945],7940}. +{[769,787,949],7956}. +{[769,787,951],7972}. +{[769,787,953],7988}. +{[769,787,959],8004}. +{[769,787,965],8020}. +{[769,787,969],8036}. +{[769,788,837,913],8077}. +{[769,788,837,919],8093}. +{[769,788,837,937],8109}. +{[769,788,837,945],8069}. +{[769,788,837,951],8085}. +{[769,788,837,969],8101}. +{[769,788,913],7949}. +{[769,788,917],7965}. +{[769,788,919],7981}. +{[769,788,921],7997}. +{[769,788,927],8013}. +{[769,788,933],8029}. +{[769,788,937],8045}. +{[769,788,945],7941}. +{[769,788,949],7957}. +{[769,788,951],7973}. +{[769,788,953],7989}. +{[769,788,959],8005}. +{[769,788,965],8021}. +{[769,788,969],8037}. +{[769,795,79],7898}. +{[769,795,85],7912}. +{[769,795,111],7899}. +{[769,795,117],7913}. +{[769,807,67],7688}. +{[769,807,99],7689}. +{[769,837,945],8116}. +{[769,837,951],8132}. +{[769,837,959],8180}. +{[769,913],8123}. +{[769,917],8137}. +{[769,919],8139}. +{[769,921],8155}. +{[769,927],8185}. +{[769,933],8171}. +{[769,937],8187}. +{[769,945],8049}. +{[769,949],8051}. +{[769,951],8053}. +{[769,953],8055}. +{[769,959],8057}. +{[769,965],8059}. +{[769,969],8061}. +{[769,1043],1027}. +{[769,1050],1036}. +{[769,1075],1107}. +{[769,1082],1116}. +{[769,8127],8142}. +{[769,8190],8158}. +{[770,65],194}. +{[770,67],264}. +{[770,69],202}. +{[770,71],284}. +{[770,72],292}. +{[770,73],206}. +{[770,74],308}. +{[770,79],212}. +{[770,83],348}. +{[770,85],219}. +{[770,87],372}. +{[770,89],374}. +{[770,90],7824}. +{[770,97],226}. +{[770,99],265}. +{[770,101],234}. +{[770,103],285}. +{[770,104],293}. +{[770,105],238}. +{[770,106],309}. +{[770,111],244}. +{[770,115],349}. +{[770,117],251}. +{[770,119],373}. +{[770,121],375}. +{[770,122],7825}. +{[770,803,65],7852}. +{[770,803,69],7878}. +{[770,803,79],7896}. +{[770,803,97],7853}. +{[770,803,101],7879}. +{[770,803,111],7897}. +{[771,65],195}. +{[771,69],7868}. +{[771,73],296}. +{[771,78],209}. +{[771,79],213}. +{[771,85],360}. +{[771,86],7804}. +{[771,89],7928}. +{[771,97],227}. +{[771,101],7869}. +{[771,105],297}. +{[771,110],241}. +{[771,111],245}. +{[771,117],361}. +{[771,118],7805}. +{[771,121],7929}. +{[771,770,65],7850}. +{[771,770,69],7876}. +{[771,770,79],7894}. +{[771,770,97],7851}. +{[771,770,101],7877}. +{[771,770,111],7895}. +{[771,774,65],7860}. +{[771,774,97],7861}. +{[771,795,79],7904}. +{[771,795,85],7918}. +{[771,795,111],7905}. +{[771,795,117],7919}. +{[772,65],256}. +{[772,69],274}. +{[772,71],7712}. +{[772,73],298}. +{[772,79],332}. +{[772,85],362}. +{[772,97],257}. +{[772,101],275}. +{[772,103],7713}. +{[772,105],299}. +{[772,111],333}. +{[772,117],363}. +{[772,198],482}. +{[772,230],483}. +{[772,775,65],480}. +{[772,775,97],481}. +{[772,776,65],478}. +{[772,776,85],469}. +{[772,776,97],479}. +{[772,776,117],470}. +{[772,803,76],7736}. +{[772,803,82],7772}. +{[772,803,108],7737}. +{[772,803,114],7773}. +{[772,808,79],492}. +{[772,808,111],493}. +{[772,913],8121}. +{[772,921],8153}. +{[772,933],8169}. +{[772,945],8113}. +{[772,953],8145}. +{[772,965],8161}. +{[772,1048],1250}. +{[772,1059],1262}. +{[772,1080],1251}. +{[772,1091],1263}. +{[774,65],258}. +{[774,69],276}. +{[774,71],286}. +{[774,73],300}. +{[774,79],334}. +{[774,85],364}. +{[774,97],259}. +{[774,101],277}. +{[774,103],287}. +{[774,105],301}. +{[774,111],335}. +{[774,117],365}. +{[774,803,65],7862}. +{[774,803,97],7863}. +{[774,807,69],7708}. +{[774,807,101],7709}. +{[774,913],8120}. +{[774,921],8152}. +{[774,933],8168}. +{[774,945],8112}. +{[774,953],8144}. +{[774,965],8160}. +{[774,1040],1232}. +{[774,1045],1238}. +{[774,1046],1217}. +{[774,1048],1049}. +{[774,1059],1038}. +{[774,1072],1233}. +{[774,1077],1239}. +{[774,1078],1218}. +{[774,1080],1081}. +{[774,1091],1118}. +{[775,66],7682}. +{[775,67],266}. +{[775,68],7690}. +{[775,69],278}. +{[775,70],7710}. +{[775,71],288}. +{[775,72],7714}. +{[775,73],304}. +{[775,77],7744}. +{[775,78],7748}. +{[775,80],7766}. +{[775,82],7768}. +{[775,83],7776}. +{[775,84],7786}. +{[775,87],7814}. +{[775,88],7818}. +{[775,89],7822}. +{[775,90],379}. +{[775,98],7683}. +{[775,99],267}. +{[775,100],7691}. +{[775,101],279}. +{[775,102],7711}. +{[775,103],289}. +{[775,104],7715}. +{[775,109],7745}. +{[775,110],7749}. +{[775,112],7767}. +{[775,114],7769}. +{[775,115],7777}. +{[775,116],7787}. +{[775,119],7815}. +{[775,120],7819}. +{[775,121],7823}. +{[775,122],380}. +{[775,383],7835}. +{[775,769,83],7780}. +{[775,769,115],7781}. +{[775,774],784}. +{[775,780,83],7782}. +{[775,780,115],7783}. +{[775,803,83],7784}. +{[775,803,115],7785}. +{[776,65],196}. +{[776,69],203}. +{[776,72],7718}. +{[776,73],207}. +{[776,79],214}. +{[776,85],220}. +{[776,87],7812}. +{[776,88],7820}. +{[776,89],376}. +{[776,97],228}. +{[776,101],235}. +{[776,104],7719}. +{[776,105],239}. +{[776,111],246}. +{[776,116],7831}. +{[776,117],252}. +{[776,119],7813}. +{[776,120],7821}. +{[776,121],255}. +{[776,399],1242}. +{[776,415],1258}. +{[776,601],1243}. +{[776,629],1259}. +{[776,771,79],7758}. +{[776,771,111],7759}. +{[776,772,85],7802}. +{[776,772,117],7803}. +{[776,921],938}. +{[776,933],939}. +{[776,953],970}. +{[776,965],971}. +{[776,978],980}. +{[776,1030],1031}. +{[776,1040],1234}. +{[776,1045],1025}. +{[776,1046],1244}. +{[776,1047],1246}. +{[776,1048],1252}. +{[776,1054],1254}. +{[776,1059],1264}. +{[776,1063],1268}. +{[776,1067],1272}. +{[776,1072],1235}. +{[776,1077],1105}. +{[776,1078],1245}. +{[776,1079],1247}. +{[776,1080],1253}. +{[776,1086],1255}. +{[776,1091],1265}. +{[776,1095],1269}. +{[776,1099],1273}. +{[776,1110],1111}. +{[777,65],7842}. +{[777,69],7866}. +{[777,73],7880}. +{[777,79],7886}. +{[777,85],7910}. +{[777,89],7926}. +{[777,97],7843}. +{[777,101],7867}. +{[777,105],7881}. +{[777,111],7887}. +{[777,117],7911}. +{[777,121],7927}. +{[777,770,65],7848}. +{[777,770,69],7874}. +{[777,770,79],7892}. +{[777,770,97],7849}. +{[777,770,101],7875}. +{[777,770,111],7893}. +{[777,774,65],7858}. +{[777,774,97],7859}. +{[777,795,79],7902}. +{[777,795,85],7916}. +{[777,795,111],7903}. +{[777,795,117],7917}. +{[778,65],197}. +{[778,85],366}. +{[778,97],229}. +{[778,117],367}. +{[778,119],7832}. +{[778,121],7833}. +{[779,79],336}. +{[779,85],368}. +{[779,111],337}. +{[779,117],369}. +{[779,1059],1266}. +{[779,1091],1267}. +{[780,65],461}. +{[780,67],268}. +{[780,68],270}. +{[780,69],282}. +{[780,71],486}. +{[780,73],463}. +{[780,75],488}. +{[780,76],317}. +{[780,78],327}. +{[780,79],465}. +{[780,82],344}. +{[780,83],352}. +{[780,84],356}. +{[780,85],467}. +{[780,90],381}. +{[780,97],462}. +{[780,99],269}. +{[780,100],271}. +{[780,101],283}. +{[780,103],487}. +{[780,105],464}. +{[780,106],496}. +{[780,107],489}. +{[780,108],318}. +{[780,110],328}. +{[780,111],466}. +{[780,114],345}. +{[780,115],353}. +{[780,116],357}. +{[780,117],468}. +{[780,122],382}. +{[780,439],494}. +{[780,658],495}. +{[780,776,85],473}. +{[780,776,117],474}. +{[781,168],901}. +{[781,776],836}. +{[781,776,953],912}. +{[781,776,965],944}. +{[781,913],902}. +{[781,917],904}. +{[781,919],905}. +{[781,921],906}. +{[781,927],908}. +{[781,933],910}. +{[781,937],911}. +{[781,945],940}. +{[781,949],941}. +{[781,951],942}. +{[781,953],943}. +{[781,959],972}. +{[781,965],973}. +{[781,969],974}. +{[781,978],979}. +{[783,65],512}. +{[783,69],516}. +{[783,73],520}. +{[783,79],524}. +{[783,82],528}. +{[783,85],532}. +{[783,97],513}. +{[783,101],517}. +{[783,105],521}. +{[783,111],525}. +{[783,114],529}. +{[783,117],533}. +{[783,1140],1142}. +{[783,1141],1143}. +{[785,65],514}. +{[785,69],518}. +{[785,73],522}. +{[785,79],526}. +{[785,82],530}. +{[785,85],534}. +{[785,97],515}. +{[785,101],519}. +{[785,105],523}. +{[785,111],527}. +{[785,114],531}. +{[785,117],535}. +{[787],835}. +{[787,837,913],8072}. +{[787,837,919],8088}. +{[787,837,937],8104}. +{[787,837,945],8064}. +{[787,837,951],8080}. +{[787,837,969],8096}. +{[787,913],7944}. +{[787,917],7960}. +{[787,919],7976}. +{[787,921],7992}. +{[787,927],8008}. +{[787,937],8040}. +{[787,945],7936}. +{[787,949],7952}. +{[787,951],7968}. +{[787,953],7984}. +{[787,959],8000}. +{[787,961],8164}. +{[787,965],8016}. +{[787,969],8032}. +{[788,837,913],8073}. +{[788,837,919],8089}. +{[788,837,937],8105}. +{[788,837,945],8065}. +{[788,837,951],8081}. +{[788,837,969],8097}. +{[788,913],7945}. +{[788,917],7961}. +{[788,919],7977}. +{[788,921],7993}. +{[788,927],8009}. +{[788,929],8172}. +{[788,933],8025}. +{[788,937],8041}. +{[788,945],7937}. +{[788,949],7953}. +{[788,951],7969}. +{[788,953],7985}. +{[788,959],8001}. +{[788,961],8165}. +{[788,965],8017}. +{[788,969],8033}. +{[795,79],416}. +{[795,85],431}. +{[795,111],417}. +{[795,117],432}. +{[803,65],7840}. +{[803,66],7684}. +{[803,68],7692}. +{[803,69],7864}. +{[803,72],7716}. +{[803,73],7882}. +{[803,75],7730}. +{[803,76],7734}. +{[803,77],7746}. +{[803,78],7750}. +{[803,79],7884}. +{[803,82],7770}. +{[803,83],7778}. +{[803,84],7788}. +{[803,85],7908}. +{[803,86],7806}. +{[803,87],7816}. +{[803,89],7924}. +{[803,90],7826}. +{[803,97],7841}. +{[803,98],7685}. +{[803,100],7693}. +{[803,101],7865}. +{[803,104],7717}. +{[803,105],7883}. +{[803,107],7731}. +{[803,108],7735}. +{[803,109],7747}. +{[803,110],7751}. +{[803,111],7885}. +{[803,114],7771}. +{[803,115],7779}. +{[803,116],7789}. +{[803,117],7909}. +{[803,118],7807}. +{[803,119],7817}. +{[803,121],7925}. +{[803,122],7827}. +{[803,795,79],7906}. +{[803,795,85],7920}. +{[803,795,111],7907}. +{[803,795,117],7921}. +{[804,85],7794}. +{[804,117],7795}. +{[805,65],7680}. +{[805,97],7681}. +{[807,67],199}. +{[807,68],7696}. +{[807,71],290}. +{[807,72],7720}. +{[807,75],310}. +{[807,76],315}. +{[807,78],325}. +{[807,82],342}. +{[807,83],350}. +{[807,84],354}. +{[807,99],231}. +{[807,100],7697}. +{[807,103],291}. +{[807,104],7721}. +{[807,107],311}. +{[807,108],316}. +{[807,110],326}. +{[807,114],343}. +{[807,115],351}. +{[807,116],355}. +{[808,65],260}. +{[808,69],280}. +{[808,73],302}. +{[808,79],490}. +{[808,85],370}. +{[808,97],261}. +{[808,101],281}. +{[808,105],303}. +{[808,111],491}. +{[808,117],371}. +{[813,68],7698}. +{[813,69],7704}. +{[813,76],7740}. +{[813,78],7754}. +{[813,84],7792}. +{[813,85],7798}. +{[813,100],7699}. +{[813,101],7705}. +{[813,108],7741}. +{[813,110],7755}. +{[813,116],7793}. +{[813,117],7799}. +{[814,72],7722}. +{[814,104],7723}. +{[816,69],7706}. +{[816,73],7724}. +{[816,85],7796}. +{[816,101],7707}. +{[816,105],7725}. +{[816,117],7797}. +{[817,66],7686}. +{[817,68],7694}. +{[817,75],7732}. +{[817,76],7738}. +{[817,78],7752}. +{[817,82],7774}. +{[817,84],7790}. +{[817,90],7828}. +{[817,98],7687}. +{[817,100],7695}. +{[817,104],7830}. +{[817,107],7733}. +{[817,108],7739}. +{[817,110],7753}. +{[817,114],7775}. +{[817,116],7791}. +{[817,122],7829}. +{[834,168],8129}. +{[834,776,953],8151}. +{[834,776,965],8167}. +{[834,787,837,913],8078}. +{[834,787,837,919],8094}. +{[834,787,837,937],8110}. +{[834,787,837,945],8070}. +{[834,787,837,951],8086}. +{[834,787,837,969],8102}. +{[834,787,913],7950}. +{[834,787,919],7982}. +{[834,787,921],7998}. +{[834,787,937],8046}. +{[834,787,945],7942}. +{[834,787,951],7974}. +{[834,787,953],7990}. +{[834,787,965],8022}. +{[834,787,969],8038}. +{[834,788,837,913],8079}. +{[834,788,837,919],8095}. +{[834,788,837,937],8111}. +{[834,788,837,945],8071}. +{[834,788,837,951],8087}. +{[834,788,837,969],8103}. +{[834,788,913],7951}. +{[834,788,919],7983}. +{[834,788,921],7999}. +{[834,788,933],8031}. +{[834,788,937],8047}. +{[834,788,945],7943}. +{[834,788,951],7975}. +{[834,788,953],7991}. +{[834,788,965],8023}. +{[834,788,969],8039}. +{[834,837,945],8119}. +{[834,837,951],8135}. +{[834,837,969],8183}. +{[834,945],8118}. +{[834,951],8134}. +{[834,953],8150}. +{[834,965],8166}. +{[834,969],8182}. +{[834,8127],8143}. +{[834,8190],8159}. +{[837,913],8124}. +{[837,919],8140}. +{[837,937],8188}. +{[837,945],8115}. +{[837,951],8131}. +{[837,969],8179}. +{[953],8126}. +{[1463,1488],64302}. +{[1463,1522],64287}. +{[1464,1488],64303}. +{[1465,1493],64331}. +{[1468,1488],64304}. +{[1468,1489],64305}. +{[1468,1490],64306}. +{[1468,1491],64307}. +{[1468,1492],64308}. +{[1468,1493],64309}. +{[1468,1494],64310}. +{[1468,1496],64312}. +{[1468,1497],64313}. +{[1468,1498],64314}. +{[1468,1499],64315}. +{[1468,1500],64316}. +{[1468,1502],64318}. +{[1468,1504],64320}. +{[1468,1505],64321}. +{[1468,1507],64323}. +{[1468,1508],64324}. +{[1468,1510],64326}. +{[1468,1511],64327}. +{[1468,1512],64328}. +{[1468,1513],64329}. +{[1468,1514],64330}. +{[1471,1489],64332}. +{[1471,1499],64333}. +{[1471,1508],64334}. +{[1473,1468,1513],64300}. +{[1473,1513],64298}. +{[1474,1468,1513],64301}. +{[1474,1513],64299}. +{[2364,2325],2392}. +{[2364,2326],2393}. +{[2364,2327],2394}. +{[2364,2332],2395}. +{[2364,2337],2396}. +{[2364,2338],2397}. +{[2364,2344],2345}. +{[2364,2347],2398}. +{[2364,2351],2399}. +{[2364,2352],2353}. +{[2364,2355],2356}. +{[2492,2465],2524}. +{[2492,2466],2525}. +{[2492,2476],2480}. +{[2492,2479],2527}. +{[2494,2503],2507}. +{[2519,2503],2508}. +{[2620,2582],2649}. +{[2620,2583],2650}. +{[2620,2588],2651}. +{[2620,2593],2652}. +{[2620,2603],2654}. +{[2876,2849],2908}. +{[2876,2850],2909}. +{[2876,2863],2911}. +{[2878,2887],2891}. +{[2902,2887],2888}. +{[2903,2887],2892}. +{[3006,3014],3018}. +{[3006,3015],3019}. +{[3031,2962],2964}. +{[3031,3014],3020}. +{[3158,3142],3144}. +{[3266,3270],3274}. +{[3285,3263],3264}. +{[3285,3266,3270],3275}. +{[3285,3270],3271}. +{[3286,3270],3272}. +{[3390,3398],3402}. +{[3390,3399],3403}. +{[3415,3398],3404}. +{[3634,3661],3635}. +{[3762,3789],3763}. +{[3953,3954],3955}. +{[3953,3956],3957}. +{[3953,3968],3969}. +{[3953,3968,4018],3959}. +{[3953,3968,4019],3961}. +{[3968,4018],3958}. +{[3968,4019],3960}. +{[4021,3904],3945}. +{[4021,3984],4025}. +{[4023,3906],3907}. +{[4023,3916],3917}. +{[4023,3921],3922}. +{[4023,3926],3927}. +{[4023,3931],3932}. +{[4023,3986],3987}. +{[4023,3996],3997}. +{[4023,4001],4002}. +{[4023,4006],4007}. +{[4023,4011],4012}. +{[12441,12358],12436}. +{[12441,12363],12364}. +{[12441,12365],12366}. +{[12441,12367],12368}. +{[12441,12369],12370}. +{[12441,12371],12372}. +{[12441,12373],12374}. +{[12441,12375],12376}. +{[12441,12377],12378}. +{[12441,12379],12380}. +{[12441,12381],12382}. +{[12441,12383],12384}. +{[12441,12385],12386}. +{[12441,12388],12389}. +{[12441,12390],12391}. +{[12441,12392],12393}. +{[12441,12399],12400}. +{[12441,12402],12403}. +{[12441,12405],12406}. +{[12441,12408],12409}. +{[12441,12411],12412}. +{[12441,12445],12446}. +{[12441,12454],12532}. +{[12441,12459],12460}. +{[12441,12461],12462}. +{[12441,12463],12464}. +{[12441,12465],12466}. +{[12441,12467],12468}. +{[12441,12469],12470}. +{[12441,12471],12472}. +{[12441,12473],12474}. +{[12441,12475],12476}. +{[12441,12477],12478}. +{[12441,12479],12480}. +{[12441,12481],12482}. +{[12441,12484],12485}. +{[12441,12486],12487}. +{[12441,12488],12489}. +{[12441,12495],12496}. +{[12441,12498],12499}. +{[12441,12501],12502}. +{[12441,12504],12505}. +{[12441,12507],12508}. +{[12441,12527],12535}. +{[12441,12528],12536}. +{[12441,12529],12537}. +{[12441,12530],12538}. +{[12441,12541],12542}. +{[12442,12399],12401}. +{[12442,12402],12404}. +{[12442,12405],12407}. +{[12442,12408],12410}. +{[12442,12411],12413}. +{[12442,12495],12497}. +{[12442,12498],12500}. +{[12442,12501],12503}. +{[12442,12504],12506}. +{[12442,12507],12509}. diff --git a/erts/emulator/internal_doc/dec.erl b/erts/emulator/internal_doc/dec.erl new file mode 100644 index 0000000000..0315f2a52d --- /dev/null +++ b/erts/emulator/internal_doc/dec.erl @@ -0,0 +1,237 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% This program is used to generate a header file with data for +%% normalizing denormalized unicode. + +%% The C header is generated from a text file containing tuples in the +%% following format: +%% {RevList,Translation} +%% Where 'RevList' is a reversed list of the denormalized repressentation of +%% the character 'Translation'. An example would be the swedish character +%% 'ö', which would be represented in the file as: +%% {[776,111],246}, as the denormalized representation of codepoint 246 +%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776), +%% while 'ä' instead is represented as {[776,97],228}, as the denormalized +%% form would be [97,776] (same accent but an 'a' instead). +%% The datafile is generated from the table on Apple's developer connection +%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html +%% The generating is done whenever new data is present (i.e. dec.dat has +%% to be changed) and not for every build. The product (the C header) is copied +%% to $ERL_TOP/erts/beam after generation and checked in. +%% The program and the data file is included for reference. + +-module(dec). + +-compile(export_all). + +-define(HASH_SIZE_FACTOR,2). +-define(BIG_PREFIX_SIZE,392). + +-define(INPUT_FILE_NAME,"dec.dat"). +-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h"). + +read(FName) -> + {ok,L} = file:consult(FName), + [{A,B} || {A,B} <- L, + length(A) > 1% , hd(A) < 769 + ]. + +dec() -> + L = read(?INPUT_FILE_NAME), + G = group(L), + {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]), + io:format + (Out, + "/*~n" + "* %CopyrightBegin%~n" + "*~n" + "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n" + "*~n" + "* The contents of this file are subject to the Erlang Public License,~n" + "* Version 1.1, (the \"License\"); you may not use this file except in~n" + "* compliance with the License. You should have received a copy of the~n" + "* Erlang Public License along with this software. If not, it can be~n" + "* retrieved online at http://www.erlang.org/.~n" + "*~n" + "* Software distributed under the License is distributed on an " + "\"AS IS\"~n" + "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n" + "* the License for the specific language governing rights and " + "limitations~n" + "* under the License.~n" + "*~n" + "* %CopyrightEnd%~n" + "*/~n" + "/*~n" + "* This file is automatically generated by ~p.erl, " + "do not edit manually~n" + "*/~n", + [?MODULE]), + + io:format(Out, + "#define HASH_SIZE_FACTOR ~w~n" + "typedef struct _compose_entry {~n" + " Uint16 c;~n" + " Uint16 res;~n" + " Uint16 num_subs;~n" + " struct _compose_entry *subs;~n" + " int *hash;~n" + "} CompEntry;~n~n" + "static int compose_tab_size = ~p;~n", + [?HASH_SIZE_FACTOR,length(G)]), + d(Out,G,[],0), + PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))), + dump_prefixes(Out,PreTab), +%% Using this cuts down on the searching in the +%% actual implementation, but wastes memory with little real gain.. +%% LL = lists:flatten([PartList || {PartList,_} <- L]), +%% BigPreTab = tuple_to_list( +%% make_big_prefixes(LL, +%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))), +%% dump_big_prefixes(Out,BigPreTab), + file:close(Out), + ok. + + + +d(Out,List,D,C) -> + d_sub(Out,List,D,C), + d_top_hash(Out,List,D,C), + d_top(Out,List,D,C). +d_sub(_Out,[],_D,_C) -> + ok; +d_sub(Out,[{_CP,[],_Res}|T],D,C) -> + d_sub(Out,T,D,C+1); +d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) -> + d(Out,Subs,[C|D],0), + d_sub(Out,T,D,C+1). +d_top(Out,L,D,C) -> + io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]), + d_top_1(Out,L,D,C), + io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]). + +d_top_1(_Out,[],_D,_C) -> + ok; +d_top_1(Out,[{CP,[],Res}|T],D,C) -> + io:format(Out, + "{~w, ~w, 0, NULL, NULL}",[CP,Res]), + if + T =:= [] -> + io:format(Out,"~n",[]); + true -> + io:format(Out,",~n",[]) + end, + d_top_1(Out,T,D,C+1); +d_top_1(Out,[{CP,Subs,_Res}|T],D,C) -> + io:format(Out, + "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs), + format_depth([C|D]), + "hash_"++format_depth([C|D])]), + if + T =:= [] -> + io:format(Out,"~n",[]); + true -> + io:format(Out,",~n",[]) + end, + d_top_1(Out,T,D,C+1). + + +d_top_hash(Out,List,D,_C) -> + HSize = length(List)*?HASH_SIZE_FACTOR, + io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]), + Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize), + io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]). + +d_top_hash_1([],_,Hash,_HSize) -> + Hash; +d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) -> + Bucket = hash_search(Hash,HSize,CP rem HSize), + d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize). + +hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 -> + Bucket; +hash_search(Hash,HSize,Bucket) -> + hash_search(Hash,HSize,(Bucket + 1) rem HSize). + +format_depth(D) -> + lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))). + + + + +make_prefix_table([],Table) -> + Table; +make_prefix_table([{C,_,_}|T],Table) when C =< 4023 -> + Index = (C div 32) + 1 - 24, + Pos = C rem 32, + X = element(Index,Table), + Y = X bor (1 bsl Pos), + NewTab = setelement(Index,Table,Y), + make_prefix_table(T,NewTab); +make_prefix_table([_|T],Tab) -> + make_prefix_table(T,Tab). + +dump_prefixes(Out,L) -> + io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]), + io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]), + dump_prefixes_1(Out,L). +dump_prefixes_1(Out,[H]) -> + io:format(Out," 0x~8.16.0BU~n",[H]), + io:format(Out,"};~n",[]); +dump_prefixes_1(Out,[H|T]) -> + io:format(Out," 0x~8.16.0BU,~n",[H]), + dump_prefixes_1(Out,T). + +%% make_big_prefixes([],Table) -> +%% Table; +%% make_big_prefixes([C|T],Table) -> +%% Index = (C div 32) + 1, +%% Pos = C rem 32, +%% X = element(Index,Table), +%% Y = X bor (1 bsl Pos), +%% NewTab = setelement(Index,Table,Y), +%% make_big_prefixes(T,NewTab). + +%% dump_big_prefixes(Out,L) -> +%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]), +%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]), +%% dump_prefixes_1(Out,L). + +pick([],_,Acc) -> + {lists:reverse(Acc),[]}; +pick([{[H|TT],N}|T],H,Acc) -> + pick(T,H,[{TT,N}|Acc]); +pick([{[H|_],_}|_]=L,M,Acc) when H =/= M -> + {lists:reverse(Acc),L}. + + +group([]) -> + []; +group([{[H],N}|T]) -> + {Part,Rest} = pick(T,H,[]), + [{H,group(Part),N}| group(Rest)]; +group([{[H|_],_}|_]=L) -> + {Part,Rest} = pick(L,H,[]), + [{H,group(Part),0}| group(Rest)]. + + + + + diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 7338ffc963..461e763f03 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -17,10 +17,11 @@ * %CopyrightEnd% */ + + /* - * Description: Check I/O - * - * Author: Rickard Green + * Darwin needs conversion! + * http://developer.apple.com/library/mac/#qa/qa2001/qa1235.html */ #ifdef HAVE_CONFIG_H @@ -30,6 +31,10 @@ #include "sys.h" #include "global.h" +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) +#define __DARWIN__ 1 +#endif + #if !defined(__WIN32__) #include #if !defined(HAVE_SETLOCALE) || !defined(HAVE_NL_LANGINFO) || !defined(HAVE_LANGINFO_H) @@ -42,7 +47,7 @@ /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; -#if defined(__WIN32__) +#if defined(__WIN32__) || defined(__DARWIN__) static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */ #else static int user_filename_encoding = ERL_FILENAME_LATIN1; @@ -88,6 +93,11 @@ void erts_init_sys_common_misc(void) } # endif } +# if defined(__DARWIN__) + if (filename_encoding == ERL_FILENAME_UTF8) { + filename_encoding = ERL_FILENAME_UTF8_MAC; + } +# endif #endif } diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index ba1a829149..57c4353f36 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -71,7 +71,7 @@ %% read_link -export([all/1,init_per_testcase/2, fin_per_testcase/2]). --export([normal/1,icky/1,very_icky/1]). +-export([normal/1,icky/1,very_icky/1,normalize/1]). init_per_testcase(_Func, Config) -> @@ -84,8 +84,39 @@ fin_per_testcase(_Func, Config) -> all(suite) -> - [normal,icky,very_icky]. + [normal,icky,very_icky,normalize]. +normalize(suite) -> + []; +normalize(doc) -> + ["Check that filename normalization works"]; +normalize(Config) when is_list(Config) -> + random:seed({1290,431421,830412}), + try + ?line UniMode = file:native_name_encoding() =/= latin1, + if + not UniMode -> + throw(need_unicode_mode); + true -> + ok + end, + ?line Pairs = [rand_comp_decomp(200) || _ <- lists:seq(1,1000)], + case os:type() of + {unix,darwin} -> + ?line [ true = (A =:= prim_file:internal_native2name(B)) || + {A,B} <- Pairs ]; + _ -> + ok + end, + ?line [ true = (A =:= prim_file:internal_normalize_utf8(B)) || + {A,B} <- Pairs ] + + catch + throw:need_unicode_mode -> + io:format("Sorry, can only run in unicode mode.~n"), + {skipped,"VM needs to be started in Unicode filename mode"} + end. + normal(suite) -> []; normal(doc) -> @@ -653,7 +684,7 @@ very_icky_dir() -> treat_icky(Bin) -> case os:type() of {unix,darwin} -> - procentify(Bin); + binary_to_list(procentify(Bin)); {win32,_} -> binary_to_list(Bin); _ -> @@ -708,6 +739,945 @@ conv(L) -> L end. - - +rand_comp_decomp(Max) -> + N = random:uniform(Max), + L = [ rand_decomp() || _ <- lists:seq(1,N) ], + LC = [ A || {A,_} <- L], + LD = lists:flatten([B || {_,B} <- L]), + LB = case os:type() of + {win32,_} -> + unicode:characters_to_binary(LD,unicode,{utf16,little}); + _ -> + unicode:characters_to_binary(LD,unicode,utf8) + end, + {LC,LB}. + +rand_decomp() -> + BT = bigtup(), + SZ = tuple_size(BT), + element(random:uniform(SZ),BT). +bigtup() -> + {{192,[65,768]}, + {200,[69,768]}, + {204,[73,768]}, + {210,[79,768]}, + {217,[85,768]}, + {7808,[87,768]}, + {7922,[89,768]}, + {224,[97,768]}, + {232,[101,768]}, + {236,[105,768]}, + {242,[111,768]}, + {249,[117,768]}, + {7809,[119,768]}, + {7923,[121,768]}, + {8173,[168,768]}, + {7846,[65,770,768]}, + {7872,[69,770,768]}, + {7890,[79,770,768]}, + {7847,[97,770,768]}, + {7873,[101,770,768]}, + {7891,[111,770,768]}, + {7700,[69,772,768]}, + {7760,[79,772,768]}, + {7701,[101,772,768]}, + {7761,[111,772,768]}, + {7856,[65,774,768]}, + {7857,[97,774,768]}, + {475,[85,776,768]}, + {476,[117,776,768]}, + {8146,[953,776,768]}, + {8162,[965,776,768]}, + {8074,[913,837,787,768]}, + {8090,[919,837,787,768]}, + {8106,[937,837,787,768]}, + {8066,[945,837,787,768]}, + {8082,[951,837,787,768]}, + {8098,[969,837,787,768]}, + {7946,[913,787,768]}, + {7962,[917,787,768]}, + {7978,[919,787,768]}, + {7994,[921,787,768]}, + {8010,[927,787,768]}, + {8042,[937,787,768]}, + {7938,[945,787,768]}, + {7954,[949,787,768]}, + {7970,[951,787,768]}, + {7986,[953,787,768]}, + {8002,[959,787,768]}, + {8018,[965,787,768]}, + {8034,[969,787,768]}, + {8075,[913,837,788,768]}, + {8091,[919,837,788,768]}, + {8107,[937,837,788,768]}, + {8067,[945,837,788,768]}, + {8083,[951,837,788,768]}, + {8099,[969,837,788,768]}, + {7947,[913,788,768]}, + {7963,[917,788,768]}, + {7979,[919,788,768]}, + {7995,[921,788,768]}, + {8011,[927,788,768]}, + {8027,[933,788,768]}, + {8043,[937,788,768]}, + {7939,[945,788,768]}, + {7955,[949,788,768]}, + {7971,[951,788,768]}, + {7987,[953,788,768]}, + {8003,[959,788,768]}, + {8019,[965,788,768]}, + {8035,[969,788,768]}, + {7900,[79,795,768]}, + {7914,[85,795,768]}, + {7901,[111,795,768]}, + {7915,[117,795,768]}, + {8114,[945,837,768]}, + {8130,[951,837,768]}, + {8178,[969,837,768]}, + {8122,[913,768]}, + {8136,[917,768]}, + {8138,[919,768]}, + {8154,[921,768]}, + {8184,[927,768]}, + {8170,[933,768]}, + {8186,[937,768]}, + {8048,[945,768]}, + {8050,[949,768]}, + {8052,[951,768]}, + {8054,[953,768]}, + {8056,[959,768]}, + {8058,[965,768]}, + {8060,[969,768]}, + {8141,[8127,768]}, + {8157,[8190,768]}, + {193,[65,769]}, + {262,[67,769]}, + {201,[69,769]}, + {500,[71,769]}, + {205,[73,769]}, + {7728,[75,769]}, + {313,[76,769]}, + {7742,[77,769]}, + {323,[78,769]}, + {211,[79,769]}, + {7764,[80,769]}, + {340,[82,769]}, + {346,[83,769]}, + {218,[85,769]}, + {7810,[87,769]}, + {221,[89,769]}, + {377,[90,769]}, + {225,[97,769]}, + {263,[99,769]}, + {233,[101,769]}, + {501,[103,769]}, + {237,[105,769]}, + {7729,[107,769]}, + {314,[108,769]}, + {7743,[109,769]}, + {324,[110,769]}, + {243,[111,769]}, + {7765,[112,769]}, + {341,[114,769]}, + {347,[115,769]}, + {250,[117,769]}, + {7811,[119,769]}, + {253,[121,769]}, + {378,[122,769]}, + {8174,[168,769]}, + {508,[198,769]}, + {510,[216,769]}, + {509,[230,769]}, + {511,[248,769]}, + {7844,[65,770,769]}, + {7870,[69,770,769]}, + {7888,[79,770,769]}, + {7845,[97,770,769]}, + {7871,[101,770,769]}, + {7889,[111,770,769]}, + {7756,[79,771,769]}, + {7800,[85,771,769]}, + {7757,[111,771,769]}, + {7801,[117,771,769]}, + {7702,[69,772,769]}, + {7762,[79,772,769]}, + {7703,[101,772,769]}, + {7763,[111,772,769]}, + {7854,[65,774,769]}, + {7855,[97,774,769]}, + {7726,[73,776,769]}, + {471,[85,776,769]}, + {7727,[105,776,769]}, + {472,[117,776,769]}, + {8147,[953,776,769]}, + {8163,[965,776,769]}, + {506,[65,778,769]}, + {507,[97,778,769]}, + {8076,[913,837,787,769]}, + {8092,[919,837,787,769]}, + {8108,[937,837,787,769]}, + {8068,[945,837,787,769]}, + {8084,[951,837,787,769]}, + {8100,[969,837,787,769]}, + {7948,[913,787,769]}, + {7964,[917,787,769]}, + {7980,[919,787,769]}, + {7996,[921,787,769]}, + {8012,[927,787,769]}, + {8044,[937,787,769]}, + {7940,[945,787,769]}, + {7956,[949,787,769]}, + {7972,[951,787,769]}, + {7988,[953,787,769]}, + {8004,[959,787,769]}, + {8020,[965,787,769]}, + {8036,[969,787,769]}, + {8077,[913,837,788,769]}, + {8093,[919,837,788,769]}, + {8109,[937,837,788,769]}, + {8069,[945,837,788,769]}, + {8085,[951,837,788,769]}, + {8101,[969,837,788,769]}, + {7949,[913,788,769]}, + {7965,[917,788,769]}, + {7981,[919,788,769]}, + {7997,[921,788,769]}, + {8013,[927,788,769]}, + {8029,[933,788,769]}, + {8045,[937,788,769]}, + {7941,[945,788,769]}, + {7957,[949,788,769]}, + {7973,[951,788,769]}, + {7989,[953,788,769]}, + {8005,[959,788,769]}, + {8021,[965,788,769]}, + {8037,[969,788,769]}, + {7898,[79,795,769]}, + {7912,[85,795,769]}, + {7899,[111,795,769]}, + {7913,[117,795,769]}, + {7688,[67,807,769]}, + {7689,[99,807,769]}, + {8116,[945,837,769]}, + {8132,[951,837,769]}, + {8180,[959,837,769]}, + {8123,[913,769]}, + {8137,[917,769]}, + {8139,[919,769]}, + {8155,[921,769]}, + {8185,[927,769]}, + {8171,[933,769]}, + {8187,[937,769]}, + {8049,[945,769]}, + {8051,[949,769]}, + {8053,[951,769]}, + {8055,[953,769]}, + {8057,[959,769]}, + {8059,[965,769]}, + {8061,[969,769]}, + {1027,[1043,769]}, + {1036,[1050,769]}, + {1107,[1075,769]}, + {1116,[1082,769]}, + {8142,[8127,769]}, + {8158,[8190,769]}, + {194,[65,770]}, + {264,[67,770]}, + {202,[69,770]}, + {284,[71,770]}, + {292,[72,770]}, + {206,[73,770]}, + {308,[74,770]}, + {212,[79,770]}, + {348,[83,770]}, + {219,[85,770]}, + {372,[87,770]}, + {374,[89,770]}, + {7824,[90,770]}, + {226,[97,770]}, + {265,[99,770]}, + {234,[101,770]}, + {285,[103,770]}, + {293,[104,770]}, + {238,[105,770]}, + {309,[106,770]}, + {244,[111,770]}, + {349,[115,770]}, + {251,[117,770]}, + {373,[119,770]}, + {375,[121,770]}, + {7825,[122,770]}, + {7852,[65,803,770]}, + {7878,[69,803,770]}, + {7896,[79,803,770]}, + {7853,[97,803,770]}, + {7879,[101,803,770]}, + {7897,[111,803,770]}, + {195,[65,771]}, + {7868,[69,771]}, + {296,[73,771]}, + {209,[78,771]}, + {213,[79,771]}, + {360,[85,771]}, + {7804,[86,771]}, + {7928,[89,771]}, + {227,[97,771]}, + {7869,[101,771]}, + {297,[105,771]}, + {241,[110,771]}, + {245,[111,771]}, + {361,[117,771]}, + {7805,[118,771]}, + {7929,[121,771]}, + {7850,[65,770,771]}, + {7876,[69,770,771]}, + {7894,[79,770,771]}, + {7851,[97,770,771]}, + {7877,[101,770,771]}, + {7895,[111,770,771]}, + {7860,[65,774,771]}, + {7861,[97,774,771]}, + {7904,[79,795,771]}, + {7918,[85,795,771]}, + {7905,[111,795,771]}, + {7919,[117,795,771]}, + {256,[65,772]}, + {274,[69,772]}, + {7712,[71,772]}, + {298,[73,772]}, + {332,[79,772]}, + {362,[85,772]}, + {257,[97,772]}, + {275,[101,772]}, + {7713,[103,772]}, + {299,[105,772]}, + {333,[111,772]}, + {363,[117,772]}, + {482,[198,772]}, + {483,[230,772]}, + {480,[65,775,772]}, + {481,[97,775,772]}, + {478,[65,776,772]}, + {469,[85,776,772]}, + {479,[97,776,772]}, + {470,[117,776,772]}, + {7736,[76,803,772]}, + {7772,[82,803,772]}, + {7737,[108,803,772]}, + {7773,[114,803,772]}, + {492,[79,808,772]}, + {493,[111,808,772]}, + {8121,[913,772]}, + {8153,[921,772]}, + {8169,[933,772]}, + {8113,[945,772]}, + {8145,[953,772]}, + {8161,[965,772]}, + {1250,[1048,772]}, + {1262,[1059,772]}, + {1251,[1080,772]}, + {1263,[1091,772]}, + {258,[65,774]}, + {276,[69,774]}, + {286,[71,774]}, + {300,[73,774]}, + {334,[79,774]}, + {364,[85,774]}, + {259,[97,774]}, + {277,[101,774]}, + {287,[103,774]}, + {301,[105,774]}, + {335,[111,774]}, + {365,[117,774]}, + {7862,[65,803,774]}, + {7863,[97,803,774]}, + {7708,[69,807,774]}, + {7709,[101,807,774]}, + {8120,[913,774]}, + {8152,[921,774]}, + {8168,[933,774]}, + {8112,[945,774]}, + {8144,[953,774]}, + {8160,[965,774]}, + {1232,[1040,774]}, + {1238,[1045,774]}, + {1217,[1046,774]}, + {1049,[1048,774]}, + {1038,[1059,774]}, + {1233,[1072,774]}, + {1239,[1077,774]}, + {1218,[1078,774]}, + {1081,[1080,774]}, + {1118,[1091,774]}, + {7682,[66,775]}, + {266,[67,775]}, + {7690,[68,775]}, + {278,[69,775]}, + {7710,[70,775]}, + {288,[71,775]}, + {7714,[72,775]}, + {304,[73,775]}, + {7744,[77,775]}, + {7748,[78,775]}, + {7766,[80,775]}, + {7768,[82,775]}, + {7776,[83,775]}, + {7786,[84,775]}, + {7814,[87,775]}, + {7818,[88,775]}, + {7822,[89,775]}, + {379,[90,775]}, + {7683,[98,775]}, + {267,[99,775]}, + {7691,[100,775]}, + {279,[101,775]}, + {7711,[102,775]}, + {289,[103,775]}, + {7715,[104,775]}, + {7745,[109,775]}, + {7749,[110,775]}, + {7767,[112,775]}, + {7769,[114,775]}, + {7777,[115,775]}, + {7787,[116,775]}, + {7815,[119,775]}, + {7819,[120,775]}, + {7823,[121,775]}, + {380,[122,775]}, + {7835,[383,775]}, + {7780,[83,769,775]}, + {7781,[115,769,775]}, + {784,[774,775]}, + {7782,[83,780,775]}, + {7783,[115,780,775]}, + {7784,[83,803,775]}, + {7785,[115,803,775]}, + {196,[65,776]}, + {203,[69,776]}, + {7718,[72,776]}, + {207,[73,776]}, + {214,[79,776]}, + {220,[85,776]}, + {7812,[87,776]}, + {7820,[88,776]}, + {376,[89,776]}, + {228,[97,776]}, + {235,[101,776]}, + {7719,[104,776]}, + {239,[105,776]}, + {246,[111,776]}, + {7831,[116,776]}, + {252,[117,776]}, + {7813,[119,776]}, + {7821,[120,776]}, + {255,[121,776]}, + {1242,[399,776]}, + {1258,[415,776]}, + {1243,[601,776]}, + {1259,[629,776]}, + {7758,[79,771,776]}, + {7759,[111,771,776]}, + {7802,[85,772,776]}, + {7803,[117,772,776]}, + {938,[921,776]}, + {939,[933,776]}, + {970,[953,776]}, + {971,[965,776]}, + {980,[978,776]}, + {1031,[1030,776]}, + {1234,[1040,776]}, + {1025,[1045,776]}, + {1244,[1046,776]}, + {1246,[1047,776]}, + {1252,[1048,776]}, + {1254,[1054,776]}, + {1264,[1059,776]}, + {1268,[1063,776]}, + {1272,[1067,776]}, + {1235,[1072,776]}, + {1105,[1077,776]}, + {1245,[1078,776]}, + {1247,[1079,776]}, + {1253,[1080,776]}, + {1255,[1086,776]}, + {1265,[1091,776]}, + {1269,[1095,776]}, + {1273,[1099,776]}, + {1111,[1110,776]}, + {7842,[65,777]}, + {7866,[69,777]}, + {7880,[73,777]}, + {7886,[79,777]}, + {7910,[85,777]}, + {7926,[89,777]}, + {7843,[97,777]}, + {7867,[101,777]}, + {7881,[105,777]}, + {7887,[111,777]}, + {7911,[117,777]}, + {7927,[121,777]}, + {7848,[65,770,777]}, + {7874,[69,770,777]}, + {7892,[79,770,777]}, + {7849,[97,770,777]}, + {7875,[101,770,777]}, + {7893,[111,770,777]}, + {7858,[65,774,777]}, + {7859,[97,774,777]}, + {7902,[79,795,777]}, + {7916,[85,795,777]}, + {7903,[111,795,777]}, + {7917,[117,795,777]}, + {197,[65,778]}, + {366,[85,778]}, + {229,[97,778]}, + {367,[117,778]}, + {7832,[119,778]}, + {7833,[121,778]}, + {336,[79,779]}, + {368,[85,779]}, + {337,[111,779]}, + {369,[117,779]}, + {1266,[1059,779]}, + {1267,[1091,779]}, + {461,[65,780]}, + {268,[67,780]}, + {270,[68,780]}, + {282,[69,780]}, + {486,[71,780]}, + {463,[73,780]}, + {488,[75,780]}, + {317,[76,780]}, + {327,[78,780]}, + {465,[79,780]}, + {344,[82,780]}, + {352,[83,780]}, + {356,[84,780]}, + {467,[85,780]}, + {381,[90,780]}, + {462,[97,780]}, + {269,[99,780]}, + {271,[100,780]}, + {283,[101,780]}, + {487,[103,780]}, + {464,[105,780]}, + {496,[106,780]}, + {489,[107,780]}, + {318,[108,780]}, + {328,[110,780]}, + {466,[111,780]}, + {345,[114,780]}, + {353,[115,780]}, + {357,[116,780]}, + {468,[117,780]}, + {382,[122,780]}, + {494,[439,780]}, + {495,[658,780]}, + {473,[85,776,780]}, + {474,[117,776,780]}, + {901,[168,781]}, + {912,[953,776,781]}, + {944,[965,776,781]}, + {902,[913,781]}, + {904,[917,781]}, + {905,[919,781]}, + {906,[921,781]}, + {908,[927,781]}, + {910,[933,781]}, + {911,[937,781]}, + {940,[945,781]}, + {941,[949,781]}, + {942,[951,781]}, + {943,[953,781]}, + {972,[959,781]}, + {973,[965,781]}, + {974,[969,781]}, + {979,[978,781]}, + {512,[65,783]}, + {516,[69,783]}, + {520,[73,783]}, + {524,[79,783]}, + {528,[82,783]}, + {532,[85,783]}, + {513,[97,783]}, + {517,[101,783]}, + {521,[105,783]}, + {525,[111,783]}, + {529,[114,783]}, + {533,[117,783]}, + {1142,[1140,783]}, + {1143,[1141,783]}, + {514,[65,785]}, + {518,[69,785]}, + {522,[73,785]}, + {526,[79,785]}, + {530,[82,785]}, + {534,[85,785]}, + {515,[97,785]}, + {519,[101,785]}, + {523,[105,785]}, + {527,[111,785]}, + {531,[114,785]}, + {535,[117,785]}, + {8072,[913,837,787]}, + {8088,[919,837,787]}, + {8104,[937,837,787]}, + {8064,[945,837,787]}, + {8080,[951,837,787]}, + {8096,[969,837,787]}, + {7944,[913,787]}, + {7960,[917,787]}, + {7976,[919,787]}, + {7992,[921,787]}, + {8008,[927,787]}, + {8040,[937,787]}, + {7936,[945,787]}, + {7952,[949,787]}, + {7968,[951,787]}, + {7984,[953,787]}, + {8000,[959,787]}, + {8164,[961,787]}, + {8016,[965,787]}, + {8032,[969,787]}, + {8073,[913,837,788]}, + {8089,[919,837,788]}, + {8105,[937,837,788]}, + {8065,[945,837,788]}, + {8081,[951,837,788]}, + {8097,[969,837,788]}, + {7945,[913,788]}, + {7961,[917,788]}, + {7977,[919,788]}, + {7993,[921,788]}, + {8009,[927,788]}, + {8172,[929,788]}, + {8025,[933,788]}, + {8041,[937,788]}, + {7937,[945,788]}, + {7953,[949,788]}, + {7969,[951,788]}, + {7985,[953,788]}, + {8001,[959,788]}, + {8165,[961,788]}, + {8017,[965,788]}, + {8033,[969,788]}, + {416,[79,795]}, + {431,[85,795]}, + {417,[111,795]}, + {432,[117,795]}, + {7840,[65,803]}, + {7684,[66,803]}, + {7692,[68,803]}, + {7864,[69,803]}, + {7716,[72,803]}, + {7882,[73,803]}, + {7730,[75,803]}, + {7734,[76,803]}, + {7746,[77,803]}, + {7750,[78,803]}, + {7884,[79,803]}, + {7770,[82,803]}, + {7778,[83,803]}, + {7788,[84,803]}, + {7908,[85,803]}, + {7806,[86,803]}, + {7816,[87,803]}, + {7924,[89,803]}, + {7826,[90,803]}, + {7841,[97,803]}, + {7685,[98,803]}, + {7693,[100,803]}, + {7865,[101,803]}, + {7717,[104,803]}, + {7883,[105,803]}, + {7731,[107,803]}, + {7735,[108,803]}, + {7747,[109,803]}, + {7751,[110,803]}, + {7885,[111,803]}, + {7771,[114,803]}, + {7779,[115,803]}, + {7789,[116,803]}, + {7909,[117,803]}, + {7807,[118,803]}, + {7817,[119,803]}, + {7925,[121,803]}, + {7827,[122,803]}, + {7906,[79,795,803]}, + {7920,[85,795,803]}, + {7907,[111,795,803]}, + {7921,[117,795,803]}, + {7794,[85,804]}, + {7795,[117,804]}, + {7680,[65,805]}, + {7681,[97,805]}, + {199,[67,807]}, + {7696,[68,807]}, + {290,[71,807]}, + {7720,[72,807]}, + {310,[75,807]}, + {315,[76,807]}, + {325,[78,807]}, + {342,[82,807]}, + {350,[83,807]}, + {354,[84,807]}, + {231,[99,807]}, + {7697,[100,807]}, + {291,[103,807]}, + {7721,[104,807]}, + {311,[107,807]}, + {316,[108,807]}, + {326,[110,807]}, + {343,[114,807]}, + {351,[115,807]}, + {355,[116,807]}, + {260,[65,808]}, + {280,[69,808]}, + {302,[73,808]}, + {490,[79,808]}, + {370,[85,808]}, + {261,[97,808]}, + {281,[101,808]}, + {303,[105,808]}, + {491,[111,808]}, + {371,[117,808]}, + {7698,[68,813]}, + {7704,[69,813]}, + {7740,[76,813]}, + {7754,[78,813]}, + {7792,[84,813]}, + {7798,[85,813]}, + {7699,[100,813]}, + {7705,[101,813]}, + {7741,[108,813]}, + {7755,[110,813]}, + {7793,[116,813]}, + {7799,[117,813]}, + {7722,[72,814]}, + {7723,[104,814]}, + {7706,[69,816]}, + {7724,[73,816]}, + {7796,[85,816]}, + {7707,[101,816]}, + {7725,[105,816]}, + {7797,[117,816]}, + {7686,[66,817]}, + {7694,[68,817]}, + {7732,[75,817]}, + {7738,[76,817]}, + {7752,[78,817]}, + {7774,[82,817]}, + {7790,[84,817]}, + {7828,[90,817]}, + {7687,[98,817]}, + {7695,[100,817]}, + {7830,[104,817]}, + {7733,[107,817]}, + {7739,[108,817]}, + {7753,[110,817]}, + {7775,[114,817]}, + {7791,[116,817]}, + {7829,[122,817]}, + {8129,[168,834]}, + {8151,[953,776,834]}, + {8167,[965,776,834]}, + {8078,[913,837,787,834]}, + {8094,[919,837,787,834]}, + {8110,[937,837,787,834]}, + {8070,[945,837,787,834]}, + {8086,[951,837,787,834]}, + {8102,[969,837,787,834]}, + {7950,[913,787,834]}, + {7982,[919,787,834]}, + {7998,[921,787,834]}, + {8046,[937,787,834]}, + {7942,[945,787,834]}, + {7974,[951,787,834]}, + {7990,[953,787,834]}, + {8022,[965,787,834]}, + {8038,[969,787,834]}, + {8079,[913,837,788,834]}, + {8095,[919,837,788,834]}, + {8111,[937,837,788,834]}, + {8071,[945,837,788,834]}, + {8087,[951,837,788,834]}, + {8103,[969,837,788,834]}, + {7951,[913,788,834]}, + {7983,[919,788,834]}, + {7999,[921,788,834]}, + {8031,[933,788,834]}, + {8047,[937,788,834]}, + {7943,[945,788,834]}, + {7975,[951,788,834]}, + {7991,[953,788,834]}, + {8023,[965,788,834]}, + {8039,[969,788,834]}, + {8119,[945,837,834]}, + {8135,[951,837,834]}, + {8183,[969,837,834]}, + {8118,[945,834]}, + {8134,[951,834]}, + {8150,[953,834]}, + {8166,[965,834]}, + {8182,[969,834]}, + {8143,[8127,834]}, + {8159,[8190,834]}, + {8124,[913,837]}, + {8140,[919,837]}, + {8188,[937,837]}, + {8115,[945,837]}, + {8131,[951,837]}, + {8179,[969,837]}, + {64302,[1488,1463]}, + {64287,[1522,1463]}, + {64303,[1488,1464]}, + {64331,[1493,1465]}, + {64304,[1488,1468]}, + {64305,[1489,1468]}, + {64306,[1490,1468]}, + {64307,[1491,1468]}, + {64308,[1492,1468]}, + {64309,[1493,1468]}, + {64310,[1494,1468]}, + {64312,[1496,1468]}, + {64313,[1497,1468]}, + {64314,[1498,1468]}, + {64315,[1499,1468]}, + {64316,[1500,1468]}, + {64318,[1502,1468]}, + {64320,[1504,1468]}, + {64321,[1505,1468]}, + {64323,[1507,1468]}, + {64324,[1508,1468]}, + {64326,[1510,1468]}, + {64327,[1511,1468]}, + {64328,[1512,1468]}, + {64329,[1513,1468]}, + {64330,[1514,1468]}, + {64332,[1489,1471]}, + {64333,[1499,1471]}, + {64334,[1508,1471]}, + {64300,[1513,1468,1473]}, + {64298,[1513,1473]}, + {64301,[1513,1468,1474]}, + {64299,[1513,1474]}, + {2392,[2325,2364]}, + {2393,[2326,2364]}, + {2394,[2327,2364]}, + {2395,[2332,2364]}, + {2396,[2337,2364]}, + {2397,[2338,2364]}, + {2345,[2344,2364]}, + {2398,[2347,2364]}, + {2399,[2351,2364]}, + {2353,[2352,2364]}, + {2356,[2355,2364]}, + {2524,[2465,2492]}, + {2525,[2466,2492]}, + {2480,[2476,2492]}, + {2527,[2479,2492]}, + {2507,[2503,2494]}, + {2508,[2503,2519]}, + {2649,[2582,2620]}, + {2650,[2583,2620]}, + {2651,[2588,2620]}, + {2652,[2593,2620]}, + {2654,[2603,2620]}, + {2908,[2849,2876]}, + {2909,[2850,2876]}, + {2911,[2863,2876]}, + {2891,[2887,2878]}, + {2888,[2887,2902]}, + {2892,[2887,2903]}, + {3018,[3014,3006]}, + {3019,[3015,3006]}, + {2964,[2962,3031]}, + {3020,[3014,3031]}, + {3144,[3142,3158]}, + {3274,[3270,3266]}, + {3264,[3263,3285]}, + {3275,[3270,3266,3285]}, + {3271,[3270,3285]}, + {3272,[3270,3286]}, + {3402,[3398,3390]}, + {3403,[3399,3390]}, + {3404,[3398,3415]}, + {3635,[3661,3634]}, + {3763,[3789,3762]}, + {3955,[3954,3953]}, + {3957,[3956,3953]}, + {3959,[4018,3968,3953]}, + {3961,[4019,3968,3953]}, + {3958,[4018,3968]}, + {3960,[4019,3968]}, + {3945,[3904,4021]}, + {4025,[3984,4021]}, + {3907,[3906,4023]}, + {3917,[3916,4023]}, + {3922,[3921,4023]}, + {3927,[3926,4023]}, + {3932,[3931,4023]}, + {3987,[3986,4023]}, + {3997,[3996,4023]}, + {4002,[4001,4023]}, + {4007,[4006,4023]}, + {4012,[4011,4023]}, + {12436,[12358,12441]}, + {12364,[12363,12441]}, + {12366,[12365,12441]}, + {12368,[12367,12441]}, + {12370,[12369,12441]}, + {12372,[12371,12441]}, + {12374,[12373,12441]}, + {12376,[12375,12441]}, + {12378,[12377,12441]}, + {12380,[12379,12441]}, + {12382,[12381,12441]}, + {12384,[12383,12441]}, + {12386,[12385,12441]}, + {12389,[12388,12441]}, + {12391,[12390,12441]}, + {12393,[12392,12441]}, + {12400,[12399,12441]}, + {12403,[12402,12441]}, + {12406,[12405,12441]}, + {12409,[12408,12441]}, + {12412,[12411,12441]}, + {12446,[12445,12441]}, + {12532,[12454,12441]}, + {12460,[12459,12441]}, + {12462,[12461,12441]}, + {12464,[12463,12441]}, + {12466,[12465,12441]}, + {12468,[12467,12441]}, + {12470,[12469,12441]}, + {12472,[12471,12441]}, + {12474,[12473,12441]}, + {12476,[12475,12441]}, + {12478,[12477,12441]}, + {12480,[12479,12441]}, + {12482,[12481,12441]}, + {12485,[12484,12441]}, + {12487,[12486,12441]}, + {12489,[12488,12441]}, + {12496,[12495,12441]}, + {12499,[12498,12441]}, + {12502,[12501,12441]}, + {12505,[12504,12441]}, + {12508,[12507,12441]}, + {12535,[12527,12441]}, + {12536,[12528,12441]}, + {12537,[12529,12441]}, + {12538,[12530,12441]}, + {12542,[12541,12441]}, + {12401,[12399,12442]}, + {12404,[12402,12442]}, + {12407,[12405,12442]}, + {12410,[12408,12442]}, + {12413,[12411,12442]}, + {12497,[12495,12442]}, + {12500,[12498,12442]}, + {12503,[12501,12442]}, + {12506,[12504,12442]}, + {12509,[12507,12442]}}. -- cgit v1.2.3 From fed20c731b91f1debb11a809cc5999d8b04dd293 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 22 Nov 2010 16:24:59 +0100 Subject: Teach spawn_executable about Unicode Also corrected compressed files on Windows --- erts/emulator/beam/erl_bif_port.c | 91 ++++----- erts/emulator/beam/erl_unicode.c | 73 +++++++ erts/emulator/beam/global.h | 2 + erts/emulator/drivers/common/efile_drv.c | 4 +- erts/emulator/drivers/common/gzio.c | 56 ++++- erts/emulator/sys/win32/sys.c | 338 +++++++++++++++++++++++-------- erts/emulator/zlib/zutil.h | 1 + lib/kernel/test/file_name_SUITE.erl | 7 +- 8 files changed, 424 insertions(+), 148 deletions(-) diff --git a/erts/emulator/beam/erl_bif_port.c b/erts/emulator/beam/erl_bif_port.c index 378c5e73fd..fbc92b9730 100644 --- a/erts/emulator/beam/erl_bif_port.c +++ b/erts/emulator/beam/erl_bif_port.c @@ -610,6 +610,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) int binary_io; int soft_eof; Sint linebuf; + Eterm edir = NIL; byte dir[MAXPATHLEN]; /* These are the defaults */ @@ -686,19 +687,10 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) } else if (option == am_arg0) { char *a0; - int n; - if (is_nil(*tp)) { - n = 0; - } else if( (n = is_string(*tp)) == 0) { + + if ((a0 = erts_convert_filename_to_native(*tp, ERTS_ALC_T_TMP, 1)) == NULL) { goto badarg; } - a0 = (char *) erts_alloc(ERTS_ALC_T_TMP, - (n + 1) * sizeof(byte)); - if (intlist_to_buf(*tp, a0, n) != n) { - erl_exit(1, "%s:%d: Internal error\n", - __FILE__, __LINE__); - } - a0[n] = '\0'; if (opts.argv == NULL) { opts.argv = erts_alloc(ERTS_ALC_T_TMP, 2 * sizeof(char **)); @@ -711,22 +703,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) opts.argv[0] = a0; } } else if (option == am_cd) { - Eterm iolist; - DeclareTmpHeap(heap,4,p); - int r; - - UseTmpHeap(4,p); - heap[0] = *tp; - heap[1] = make_list(heap+2); - heap[2] = make_small(0); - heap[3] = NIL; - iolist = make_list(heap); - r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN); - UnUseTmpHeap(4,p); - if (r < 0) { - goto badarg; - } - opts.wd = (char *) dir; + edir = *tp; } else { goto badarg; } @@ -838,19 +815,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) goto badarg; } name = tp[1]; - if (is_atom(name)) { - name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP, - atom_tab(atom_val(name))->len+1); - sys_memcpy((void *) name_buf, - (void *) atom_tab(atom_val(name))->name, - atom_tab(atom_val(name))->len); - name_buf[atom_tab(atom_val(name))->len] = '\0'; - } else if ((i = is_string(name))) { - name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1); - if (intlist_to_buf(name, name_buf, i) != i) - erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__); - name_buf[i] = '\0'; - } else { + if ((name_buf = erts_convert_filename_to_native(name,ERTS_ALC_T_TMP,0)) == NULL) { goto badarg; } opts.spawn_type = ERTS_SPAWN_EXECUTABLE; @@ -892,7 +857,33 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) /* Argument vector only if explicit spawn_executable */ goto badarg; } - + + if (edir != NIL) { + /* A working directory is expressed differently if spawn_executable, i.e. Unicode is handles + for spawn_executable... */ + if (opts.spawn_type != ERTS_SPAWN_EXECUTABLE) { + Eterm iolist; + DeclareTmpHeap(heap,4,p); + int r; + + UseTmpHeap(4,p); + heap[0] = edir; + heap[1] = make_list(heap+2); + heap[2] = make_small(0); + heap[3] = NIL; + iolist = make_list(heap); + r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN); + UnUseTmpHeap(4,p); + if (r < 0) { + goto badarg; + } + opts.wd = (char *) dir; + } else { + if ((opts.wd = erts_convert_filename_to_native(edir,ERTS_ALC_T_TMP,0)) == NULL) { + goto badarg; + } + } + } if (driver != &spawn_driver && opts.exit_status) { goto badarg; @@ -941,6 +932,9 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) if (opts.argv) { free_args(opts.argv); } + if (opts.wd && opts.wd != ((char *)dir)) { + erts_free(ERTS_ALC_T_TMP, (void *) opts.wd); + } return port_num; badarg: @@ -950,6 +944,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump) #undef OPEN_PORT_ERROR } +/* Arguments can be given i unicode and as raw binaries, convert filename is used to convert */ static char **convert_args(Eterm l) { char **pp; @@ -966,22 +961,14 @@ static char **convert_args(Eterm l) pp[i++] = erts_default_arg0; while (is_list(l)) { str = CAR(list_val(l)); - - if (is_nil(str)) { - n = 0; - } else if( (n = is_string(str)) == 0) { - /* Not a string... */ + if ((b = erts_convert_filename_to_native(str,ERTS_ALC_T_TMP,1)) == NULL) { int j; for (j = 1; j < i; ++j) erts_free(ERTS_ALC_T_TMP, pp[j]); erts_free(ERTS_ALC_T_TMP, pp); return NULL; - } - b = (char *) erts_alloc(ERTS_ALC_T_TMP, (n + 1) * sizeof(byte)); - pp[i++] = (char *) b; - if (intlist_to_buf(str, b, n) != n) - erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__); - b[n] = '\0'; + } + pp[i++] = b; l = CDR(list_val(l)); } pp[i] = NULL; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 1e729cfc2e..3434ce3979 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2023,6 +2023,79 @@ BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2) * Simpler non-interruptable routines for UTF-8 and * Windowish UTF-16 (restricted) **********************************************************/ +/* + * This function is the heart of the Unicode support for + * open_port - spawn_executable. It converts both the name + * of the executable and the arguments according to the same rules + * as for filename conversion. That means as if your arguments are + * to be raw, you supply binaries, else unicode characters are allowed up to + * the encoding maximum (256 of the unicode max). + * Depending on the filename encoding standard, the vector is then + * converted to whatever is used, which might mean win_utf16 if on windows. + * Do not peek into the argument vector or filenam with ordinary + * string routines, that will certainly fail on some OS. + */ + +char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty) +{ + int encoding = erts_get_native_filename_encoding(); + char* name_buf = NULL; + + if (is_atom(name) || is_list(name) || (allow_empty && is_nil(name))) { + Sint need; + if ((need = erts_native_filename_need(name,encoding)) < 0) { + return NULL; + } + if (encoding == ERL_FILENAME_WIN_WCHAR) { + need += 2; + } else { + ++need; + } + name_buf = (char *) erts_alloc(alloc_type, need); + erts_native_filename_put(name,encoding,(byte *)name_buf); + name_buf[need-1] = 0; + if (encoding == ERL_FILENAME_WIN_WCHAR) { + name_buf[need-2] = 0; + } + } else if (is_binary(name)) { + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + Uint size,num_chars; + + size = binary_size(name); + bytes = erts_get_aligned_binary_bytes(name, &temp_alloc); + if (encoding != ERL_FILENAME_WIN_WCHAR) { + /*Add 0 termination only*/ + name_buf = (char *) erts_alloc(alloc_type, size+1); + memcpy(name_buf,bytes,size); + name_buf[size]=0; + } else if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK || + erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + byte *p; + /* What to do now? Maybe latin1, so just take byte for byte instead */ + name_buf = (char *) erts_alloc(alloc_type, (size+1)*2); + p = (byte *) name_buf; + while (size--) { + *p++ = *bytes++; + *p++ = 0; + } + *p++ = 0; + *p++ = 0; + } else { /* WIN_WCHAR and valid UTF8 */ + name_buf = (char *) erts_alloc(alloc_type, (num_chars+1)*2); + erts_copy_utf8_to_utf16_little((byte *) name_buf, bytes, num_chars); + name_buf[num_chars*2] = 0; + name_buf[num_chars*2+1] = 0; + } + erts_free_aligned_binary_bytes(temp_alloc); + } else { + return NULL; + } + return name_buf; +} + + Sint erts_native_filename_need(Eterm ioterm, int encoding) { Eterm *objp; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 6e86de3ebf..524db2a2eb 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1602,6 +1602,8 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding); void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars); int erts_analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left); +char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty); + #define ERTS_UTF8_OK 0 #define ERTS_UTF8_INCOMPLETE 1 #define ERTS_UTF8_ERROR 2 diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index ac73897cf2..786fa7da77 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -1401,7 +1401,7 @@ static void invoke_readlink(void *data) d->result_ok = efile_readlink(&d->errInfo, d->b, resbuf+1, RESBUFSIZE-1); if (d->result_ok != 0) - strcpy((char *) d->b + 1, resbuf+1); + FILENAME_COPY((char *) d->b + 1, resbuf+1); } static void invoke_altname(void *data) @@ -1413,7 +1413,7 @@ static void invoke_altname(void *data) d->result_ok = efile_altname(&d->errInfo, d->b, resbuf+1, RESBUFSIZE-1); if (d->result_ok != 0) - strcpy((char *) d->b + 1, resbuf+1); + FILENAME_COPY((char *) d->b + 1, resbuf+1); } static void invoke_pwritev(void *data) { diff --git a/erts/emulator/drivers/common/gzio.c b/erts/emulator/drivers/common/gzio.c index 801bc61d4d..5531a275ea 100644 --- a/erts/emulator/drivers/common/gzio.c +++ b/erts/emulator/drivers/common/gzio.c @@ -28,6 +28,7 @@ #ifdef __WIN32__ #define HAVE_CONFLICTING_FREAD_DECLARATION +#define FILENAMES_16BIT 1 #endif #ifdef STDC @@ -102,6 +103,40 @@ local uLong getLong OF((gz_stream *s)); # define ERTS_GZREAD(File, Buf, Count) fread((Buf), 1, (Count), (File)) #endif +/* + * Ripped from efile_drv.c + */ + +#ifdef FILENAMES_16BIT +# define FILENAME_BYTELEN(Str) filename_len_16bit(Str) +# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From)) +# define FILENAME_CHARSIZE 2 + + static int filename_len_16bit(const char *str) + { + const char *p = str; + while(*p != '\0' || p[1] != '\0') { + p += 2; + } + return (p - str); + } + + static void filename_cpy_16bit(char *to, const char *from) + { + while(*from != '\0' || from[1] != '\0') { + *to++ = *from++; + *to++ = *from++; + } + *to++ = *from++; + *to++ = *from++; + } + +#else +# define FILENAME_BYTELEN(Str) strlen(Str) +# define FILENAME_COPY(To,From) strcpy(To,From) +# define FILENAME_CHARSIZE 1 +#endif + /* =========================================================================== Opens a gzip (.gz) file for reading or writing. The mode parameter is as in fopen ("rb" or "wb"). The file is given either by file descriptor @@ -144,11 +179,11 @@ local gzFile gz_open (path, mode) s->position = 0; s->destroy = destroy; - s->path = (char*)ALLOC(strlen(path)+1); + s->path = (char*)ALLOC(FILENAME_BYTELEN(path)+FILENAME_CHARSIZE); if (s->path == NULL) { return s->destroy(s), (gzFile)Z_NULL; } - strcpy(s->path, path); /* do this early for debugging */ + FILENAME_COPY(s->path, path); /* do this early for debugging */ s->mode = '\0'; do { @@ -194,7 +229,22 @@ local gzFile gz_open (path, mode) s->stream.avail_out = Z_BUFSIZE; errno = 0; -#ifdef UNIX +#if defined(FILENAMES_16BIT) + { + char wfmode[160]; + int i=0,j; + for(j=0;fmode[j] != '\0';++j) { + wfmode[i++]=fmode[j]; + wfmode[i++]='\0'; + } + wfmode[i++] = '\0'; + wfmode[i++] = '\0'; + s->file = F_OPEN(path, wfmode); + if (s->file == NULL) { + return s->destroy(s), (gzFile)Z_NULL; + } + } +#elif defined(UNIX) if (s->mode == 'r') { s->file = open(path, O_RDONLY); } else { diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index 39b04b26a9..37041ed987 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -67,14 +67,17 @@ static void async_read_file(struct async_io* aio, LPVOID buf, DWORD numToRead); static int async_write_file(struct async_io* aio, LPVOID buf, DWORD numToWrite); static int get_overlapped_result(struct async_io* aio, LPDWORD pBytesRead, BOOL wait); -static BOOL CreateChildProcess(char *, HANDLE, HANDLE, +static BOOL create_child_process(char *, HANDLE, HANDLE, HANDLE, LPHANDLE, BOOL, LPVOID, LPTSTR, unsigned, char **, int *); static int create_pipe(LPHANDLE, LPHANDLE, BOOL, BOOL); -static int ApplicationType(const char* originalName, char fullPath[MAX_PATH], +static int application_type(const char* originalName, char fullPath[MAX_PATH], BOOL search_in_path, BOOL handle_quotes, int *error_return); +static int application_type_w(const char* originalName, WCHAR fullPath[MAX_PATH], + BOOL search_in_path, BOOL handle_quotes, + int *error_return); HANDLE erts_service_event; @@ -87,7 +90,7 @@ static erts_smp_atomic_t pipe_creation_counter; static erts_smp_mtx_t sys_driver_data_lock; -/* Results from ApplicationType is one of */ +/* Results from application_type(_w) is one of */ #define APPL_NONE 0 #define APPL_DOS 1 #define APPL_WIN3X 2 @@ -1235,8 +1238,10 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) */ DEBUGF(("Spawning \"%s\"\n", name)); - envir = win_build_environment(envir); - ok = CreateChildProcess(name, + envir = win_build_environment(envir); /* Still an ansi environment, could be + converted to unicode for spawn_executable, but + that is not done (yet) */ + ok = create_child_process(name, hChildStdin, hChildStdout, hChildStderr, @@ -1315,7 +1320,7 @@ create_file_thread(AsyncIo* aio, int mode) } /* - * A helper function used by CreateChildProcess(). + * A helper function used by create_child_process(). * Parses a command line with arguments and returns the length of the * first part containing the program name. * Example: input = "\"Program Files\"\\erl arg1 arg2" @@ -1356,24 +1361,25 @@ int parse_command(char* cmd){ return i; } -BOOL need_quotes(char *str) +static BOOL need_quotes(WCHAR *str) { int in_quote = 0; int backslashed = 0; int naked_space = 0; - while (*str != '\0') { + + while (*str != L'\0') { switch (*str) { - case '\\' : + case L'\\' : backslashed = !backslashed; break; - case '"': + case L'"': if (backslashed) { backslashed=0; } else { in_quote = !in_quote; } break; - case ' ': + case L' ': backslashed = 0; if (!(backslashed || in_quote)) { naked_space++; @@ -1392,7 +1398,7 @@ BOOL need_quotes(char *str) /* *---------------------------------------------------------------------- * - * CreateChildProcess -- + * create_child_process -- * * Create a child process that has pipes as its * standard input, output, and error. The child process runs @@ -1417,7 +1423,7 @@ BOOL need_quotes(char *str) */ static BOOL -CreateChildProcess +create_child_process ( char *origcmd, /* Command line for child process (including * name of executable). Or whole executable if st is @@ -1436,14 +1442,12 @@ CreateChildProcess ) { PROCESS_INFORMATION piProcInfo = {0}; - STARTUPINFO siStartInfo = {0}; BOOL ok = FALSE; int applType; /* Not to be changed for different types of executables */ int staticCreateFlags = GetPriorityClass(GetCurrentProcess()); int createFlags = DETACHED_PROCESS; char *newcmdline = NULL; - char execPath[MAX_PATH]; int cmdlength; char* thecommand; LPTSTR appname = NULL; @@ -1451,14 +1455,17 @@ CreateChildProcess *errno_return = -1; - siStartInfo.cb = sizeof(STARTUPINFO); - siStartInfo.dwFlags = STARTF_USESTDHANDLES; - siStartInfo.hStdInput = hStdin; - siStartInfo.hStdOutput = hStdout; - siStartInfo.hStdError = hStderr; - if (st != ERTS_SPAWN_EXECUTABLE) { + STARTUPINFO siStartInfo = {0}; + char execPath[MAX_PATH]; + + siStartInfo.cb = sizeof(STARTUPINFO); + siStartInfo.dwFlags = STARTF_USESTDHANDLES; + siStartInfo.hStdInput = hStdin; + siStartInfo.hStdOutput = hStdout; + siStartInfo.hStdError = hStderr; + /* * Parse out the program name from the command line (it can be quoted and * contain spaces). @@ -1470,9 +1477,9 @@ CreateChildProcess thecommand[cmdlength] = '\0'; DEBUGF(("spawn command: %s\n", thecommand)); - applType = ApplicationType(thecommand, execPath, TRUE, + applType = application_type(thecommand, execPath, TRUE, TRUE, errno_return); - DEBUGF(("ApplicationType returned for (%s) is %d\n", thecommand, applType)); + DEBUGF(("application_type returned for (%s) is %d\n", thecommand, applType)); erts_free(ERTS_ALC_T_TMP, (void *) thecommand); if (applType == APPL_NONE) { erts_free(ERTS_ALC_T_TMP,newcmdline); @@ -1501,126 +1508,147 @@ CreateChildProcess strcat(newcmdline, execPath); strcat(newcmdline, origcmd+cmdlength); - } else { /* ERTS_SPAWN_EXECUTABLE */ + DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags)); + ok = CreateProcessA(appname, + newcmdline, + NULL, + NULL, + TRUE, + createFlags | staticCreateFlags, + env, + wd, + &siStartInfo, + &piProcInfo); + + } else { /* ERTS_SPAWN_EXECUTABLE, filename and args are in unicode ({utf16,little}) */ int run_cmd = 0; - applType = ApplicationType(origcmd, execPath, FALSE, FALSE, - errno_return); + STARTUPINFOW siStartInfo = {0}; + WCHAR execPath[MAX_PATH]; + + + siStartInfo.cb = sizeof(STARTUPINFOW); + siStartInfo.dwFlags = STARTF_USESTDHANDLES; + siStartInfo.hStdInput = hStdin; + siStartInfo.hStdOutput = hStdout; + siStartInfo.hStdError = hStderr; + + applType = application_type_w(origcmd, (char *) execPath, FALSE, FALSE, + errno_return); if (applType == APPL_NONE) { return FALSE; } if (applType == APPL_DOS) { - /* - * See comment above - */ + /* + * See comment above + */ - siStartInfo.wShowWindow = SW_HIDE; - siStartInfo.dwFlags |= STARTF_USESHOWWINDOW; - createFlags = CREATE_NEW_CONSOLE; - run_cmd = 1; + siStartInfo.wShowWindow = SW_HIDE; + siStartInfo.dwFlags |= STARTF_USESHOWWINDOW; + createFlags = CREATE_NEW_CONSOLE; + run_cmd = 1; } else if (hide) { - DEBUGF(("hiding window\n")); - siStartInfo.wShowWindow = SW_HIDE; - siStartInfo.dwFlags |= STARTF_USESHOWWINDOW; - createFlags = 0; + DEBUGF(("hiding window\n")); + siStartInfo.wShowWindow = SW_HIDE; + siStartInfo.dwFlags |= STARTF_USESHOWWINDOW; + createFlags = 0; } if (run_cmd) { - char cmdPath[MAX_PATH]; + WCHAR cmdPath[MAX_PATH]; int cmdType; - cmdType = ApplicationType("cmd.exe", cmdPath, TRUE, FALSE, errno_return); + cmdType = application_type_w((char *) L"cmd.exe", (char *) cmdPath, TRUE, FALSE, errno_return); if (cmdType == APPL_NONE || cmdType == APPL_DOS) { return FALSE; } - appname = (char *) erts_alloc(ERTS_ALC_T_TMP, strlen(cmdPath)+1); - strcpy(appname,cmdPath); + appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(cmdPath)+1)*sizeof(WCHAR)); + wcscpy((WCHAR *) appname,cmdPath); } else { - appname = (char *) erts_alloc(ERTS_ALC_T_TMP, strlen(execPath)+1); - strcpy(appname,execPath); + appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(execPath)+1)*sizeof(WCHAR)); + wcscpy((WCHAR *) appname, execPath); } - if (argv == NULL) { + if (argv == NULL) { BOOL orig_need_q = need_quotes(execPath); - char *ptr; - int ocl = strlen(execPath); + WCHAR *ptr; + int ocl = wcslen(execPath); if (run_cmd) { newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP, - ocl + ((orig_need_q) ? 3 : 1) - + 11); - memcpy(newcmdline,"cmd.exe /c ",11); - ptr = newcmdline + 11; + (ocl + ((orig_need_q) ? 3 : 1) + + 11)*sizeof(WCHAR)); + memcpy(newcmdline,L"cmd.exe /c ",11*sizeof(WCHAR)); + ptr = (WCHAR *) (newcmdline + (11*sizeof(WCHAR))); } else { newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP, - ocl + ((orig_need_q) ? 3 : 1)); - ptr = newcmdline; + (ocl + ((orig_need_q) ? 3 : 1))*sizeof(WCHAR)); + ptr = (WCHAR *) newcmdline; } if (orig_need_q) { - *ptr++ = '"'; + *ptr++ = L'"'; } - memcpy(ptr,execPath,ocl); + memcpy(ptr,execPath,ocl*sizeof(WCHAR)); ptr += ocl; if (orig_need_q) { - *ptr++ = '"'; + *ptr++ = L'"'; } - *ptr = '\0'; + *ptr = L'\0'; } else { int sum = 1; /* '\0' */ - char **ar = argv; - char *n; + WCHAR **ar = (WCHAR **) argv; + WCHAR *n; char *save_arg0 = NULL; if (argv[0] == erts_default_arg0 || run_cmd) { save_arg0 = argv[0]; - argv[0] = execPath; + argv[0] = (char *) execPath; } if (run_cmd) { sum += 11; /* cmd.exe /c */ } while (*ar != NULL) { - sum += strlen(*ar); + sum += wcslen(*ar); if (need_quotes(*ar)) { sum += 2; /* quotes */ } sum++; /* space */ ++ar; } - ar = argv; - newcmdline = erts_alloc(ERTS_ALC_T_TMP, sum); - n = newcmdline; + ar = (WCHAR **) argv; + newcmdline = erts_alloc(ERTS_ALC_T_TMP, sum*sizeof(WCHAR)); + n = (WCHAR *) newcmdline; if (run_cmd) { - memcpy(n,"cmd.exe /c ",11); + memcpy(n,L"cmd.exe /c ",11*sizeof(WCHAR)); n += 11; } while (*ar != NULL) { int q = need_quotes(*ar); - sum = strlen(*ar); + sum = wcslen(*ar); if (q) { - *n++ = '"'; + *n++ = L'"'; } - memcpy(n,*ar,sum); + memcpy(n,*ar,sum*sizeof(WCHAR)); n += sum; if (q) { - *n++ = '"'; + *n++ = L'"'; } - *n++ = ' '; + *n++ = L' '; ++ar; } - ASSERT(n > newcmdline); - *(n-1) = '\0'; + *(n-1) = L'\0'; if (save_arg0 != NULL) { argv[0] = save_arg0; } } - } - DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags)); - ok = CreateProcess(appname, - newcmdline, - NULL, - NULL, - TRUE, - createFlags | staticCreateFlags, - env, - wd, - &siStartInfo, - &piProcInfo); - + DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags)); + ok = CreateProcessW((WCHAR *) appname, + (WCHAR *) newcmdline, + NULL, + NULL, + TRUE, + createFlags | staticCreateFlags, + env, + (WCHAR *) wd, + &siStartInfo, + &piProcInfo); + + } /* end SPAWN_EXECUTABLE */ if (newcmdline != NULL) { erts_free(ERTS_ALC_T_TMP,newcmdline); } @@ -1739,7 +1767,7 @@ static int create_pipe(HANDLE *phRead, HANDLE *phWrite, BOOL inheritRead, BOOL o -static int ApplicationType +static int application_type ( const char *originalName, /* Name of the application to find. */ char fullPath[MAX_PATH], /* Filled with complete path to @@ -1893,6 +1921,146 @@ static int ApplicationType return applType; } +static int application_type_w (const char *originalName, /* Name of the application to find. */ + WCHAR wfullpath[MAX_PATH],/* Filled with complete path to + * application. */ + BOOL search_in_path, /* If we should search the system wide path */ + BOOL handle_quotes, /* If we should handle quotes around executable */ + int *error_return) /* A place to put an error code */ +{ + int applType, i; + HANDLE hFile; + WCHAR *ext, *rest; + char buf[2]; + DWORD read; + IMAGE_DOS_HEADER header; + static WCHAR extensions[][5] = {L"", L".com", L".exe", L".bat"}; + int is_quoted; + int len; + WCHAR *wname = (WCHAR *) originalName; + WCHAR xfullpath[MAX_PATH]; + + len = wcslen(wname); + is_quoted = handle_quotes && len > 0 && wname[0] == L'"' && + wname[len-1] == L'"'; + + applType = APPL_NONE; + *error_return = ENOENT; + for (i = 0; i < (int) (sizeof(extensions) / sizeof(extensions[0])); i++) { + if(is_quoted) { + lstrcpynW(xfullpath, wname+1, MAX_PATH - 7); /* Cannot start using StringCchCopy yet, we support + older platforms */ + len = wcslen(xfullpath); + if(len > 0) { + xfullpath[len-1] = L'\0'; + } + } else { + lstrcpynW(xfullpath, wname, MAX_PATH - 5); + } + wcscat(xfullpath, extensions[i]); + /* It seems that the Unicode version does not allow in and out parameter to overlap. */ + SearchPathW((search_in_path) ? NULL : L".", xfullpath, NULL, MAX_PATH, wfullpath, &rest); + + /* + * Ignore matches on directories or data files, return if identified + * a known type. + */ + + if (GetFileAttributesW(wfullpath) & FILE_ATTRIBUTE_DIRECTORY) { + continue; + } + + ext = wcsrchr(wfullpath, L'.'); + if ((ext != NULL) && (_wcsicmp(ext, L".bat") == 0)) { + *error_return = EACCES; + applType = APPL_DOS; + break; + } + + hFile = CreateFileW(wfullpath, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + continue; + } + + *error_return = EACCES; /* If considered an error, + it's an access error */ + header.e_magic = 0; + ReadFile(hFile, (void *) &header, sizeof(header), &read, NULL); + if (header.e_magic != IMAGE_DOS_SIGNATURE) { + /* + * Doesn't have the magic number for relocatable executables. If + * filename ends with .com, assume it's a DOS application anyhow. + * Note that we didn't make this assumption at first, because some + * supposed .com files are really 32-bit executables with all the + * magic numbers and everything. + */ + + CloseHandle(hFile); + if ((ext != NULL) && (_wcsicmp(ext, L".com") == 0)) { + applType = APPL_DOS; + break; + } + continue; + } + if (header.e_lfarlc != sizeof(header)) { + /* + * All Windows 3.X and Win32 and some DOS programs have this value + * set here. If it doesn't, assume that since it already had the + * other magic number it was a DOS application. + */ + + CloseHandle(hFile); + applType = APPL_DOS; + break; + } + + /* + * The DWORD at header.e_lfanew points to yet another magic number. + */ + + buf[0] = '\0'; + SetFilePointer(hFile, header.e_lfanew, NULL, FILE_BEGIN); + ReadFile(hFile, (void *) buf, 2, &read, NULL); + CloseHandle(hFile); + + if ((buf[0] == 'L') && (buf[1] == 'E')) { + applType = APPL_DOS; + } else if ((buf[0] == 'N') && (buf[1] == 'E')) { + applType = APPL_WIN3X; + } else if ((buf[0] == 'P') && (buf[1] == 'E')) { + applType = APPL_WIN32; + } else { + continue; + } + break; + } + + if (applType == APPL_NONE) { + return APPL_NONE; + } + + if ((applType == APPL_DOS) || (applType == APPL_WIN3X)) { + /* + * Replace long path name of executable with short path name for + * 16-bit applications. Otherwise the application may not be able + * to correctly parse its own command line to separate off the + * application name from the arguments. + */ + + GetShortPathNameW(wfullpath, wfullpath, MAX_PATH); + } + if (is_quoted) { + /* restore quotes on quoted program name */ + len = wcslen(wfullpath); + memmove(wfullpath+1,wfullpath,len*sizeof(WCHAR)); + wfullpath[0]=L'"'; + wfullpath[len+1]=L'"'; + wfullpath[len+2]=L'\0'; + } + return applType; +} + /* * Thread function used to emulate overlapped reading. */ diff --git a/erts/emulator/zlib/zutil.h b/erts/emulator/zlib/zutil.h index d560382691..a8872e1c88 100644 --- a/erts/emulator/zlib/zutil.h +++ b/erts/emulator/zlib/zutil.h @@ -142,6 +142,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #ifdef WIN32 # ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ # define OS_CODE 0x0b +# define F_OPEN(name, mode) _wfopen((WCHAR *)(name), (WCHAR *)(mode)) /* Unicode */ # endif #endif diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 57c4353f36..c96621d036 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -745,12 +745,7 @@ rand_comp_decomp(Max) -> L = [ rand_decomp() || _ <- lists:seq(1,N) ], LC = [ A || {A,_} <- L], LD = lists:flatten([B || {_,B} <- L]), - LB = case os:type() of - {win32,_} -> - unicode:characters_to_binary(LD,unicode,{utf16,little}); - _ -> - unicode:characters_to_binary(LD,unicode,utf8) - end, + LB = unicode:characters_to_binary(LD,unicode,utf8), {LC,LB}. rand_decomp() -> -- cgit v1.2.3 From 561617f5ce8ac04e52ebb6cac2b131850a787869 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 26 Nov 2010 14:12:13 +0100 Subject: Adapt inet_drv to Visual Studio 2008 --- erts/configure.in | 2 +- erts/emulator/drivers/common/inet_drv.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/erts/configure.in b/erts/configure.in index 8d629c25ae..762a86ef6f 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -1479,7 +1479,7 @@ AC_CHECK_HEADERS(fcntl.h limits.h unistd.h syslog.h dlfcn.h ieeefp.h \ sys/ioctl.h sys/time.h sys/uio.h \ sys/socket.h sys/sockio.h sys/socketio.h \ net/errno.h malloc.h mach-o/dyld.h arpa/nameser.h \ - pty.h util.h utmp.h langinfo.h poll.h) + pty.h util.h utmp.h langinfo.h poll.h sdkddkver.h) AC_CHECK_HEADER(sys/resource.h, [AC_DEFINE(HAVE_SYS_RESOURCE_H, 1, diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 18f7cdd15a..6f56cab575 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -88,9 +88,21 @@ #include #endif #include +#include /* NEED VC 6.0 or higher */ + +/* Visual studio 2008+: NTDDI_VERSION needs to be set for iphlpapi.h + to define the right structures. It needs to be set to WINXP (or LONGHORN) + for IPV6 to work and it's set lower by default, so we need to change it. */ +#ifdef HAVE_SDKDDKVER_H +# include +# ifdef NTDDI_VERSION +# undef NTDDI_VERSION +# endif +# define NTDDI_VERSION NTDDI_WINXP +#endif + #include -#include /* NEED VC 6.0 !!! */ #undef WANT_NONBLOCKING #include "sys.h" -- cgit v1.2.3 From 94eda4f777d79b159f0b6fd5ff5519649aa5affb Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 26 Nov 2010 14:14:22 +0100 Subject: Teach prim_file not to accept atoms and not to throw exceptions --- erts/emulator/beam/erl_unicode.c | 7 +++++++ erts/preloaded/src/prim_file.erl | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 3434ce3979..72207df621 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2429,6 +2429,13 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) Sint need; Eterm bin_term; byte* bin_p; + /* Prim file explicitly does not allow atoms, although we could + very well cope with it. Instead of letting 'file' handle them, + it would probably be more efficient to handle them here. Subject to + change in R15. */ + if (is_atom(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } if (is_binary(BIF_ARG_1)) { byte *temp_alloc = NULL; byte *bytes; diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 75b0faacd3..10be852e92 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -1228,5 +1228,7 @@ reverse(X) -> lists:reverse(X, []). reverse(L, T) -> lists:reverse(L, T). % Will add zero termination too +% The 'EXIT' tuple from a bad argument will eventually generate an error +% in list_to_binary, which is caught and generates the {error,badarg} return pathname(File) -> - prim_file:internal_name2native(File). + (catch prim_file:internal_name2native(File)). -- cgit v1.2.3 From 15f5143f0d7c2bde33e1d3e38e622943d555d415 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 26 Nov 2010 14:15:22 +0100 Subject: Update preloaded prim_file --- erts/preloaded/ebin/prim_file.beam | Bin 30536 -> 31624 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam index a3f300268f..8a1c22e5b5 100644 Binary files a/erts/preloaded/ebin/prim_file.beam and b/erts/preloaded/ebin/prim_file.beam differ -- cgit v1.2.3 From 0c18c4ba3f561ae6d9ff943863bfc62e5e9099e1 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 26 Nov 2010 14:13:25 +0100 Subject: Corrected testcases broken by unicode filenames Also corrected type-info for bifs --- lib/hipe/cerl/erl_bif_types.erl | 29 +++++++++++++++++++++++++++++ lib/kernel/src/code.erl | 2 ++ lib/kernel/test/code_SUITE.erl | 18 +++++++++++++++++- lib/kernel/test/os_SUITE.erl | 3 ++- lib/stdlib/test/binary_module_SUITE.erl | 2 +- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 696414eb7a..6936e5c7b4 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -2014,6 +2014,18 @@ type(file, set_cwd, 1, Xs) -> end); type(file, write_file, 2, Xs) -> strict(arg_types(file, write_file, 2), Xs, fun (_) -> t_file_return() end); +type(file, native_name_encoding, 0, _) -> + t_file_encoding(); +%%-- prim_file ---------------------------------------------------------------- +type(prim_file, internal_name2native, 1, Xs) -> + strict(arg_types(prim_file, internal_name2native, 1), Xs, + fun (_) -> t_binary() end); +type(prim_file, internal_native2name, 1, Xs) -> + strict(arg_types(prim_file, internal_native2name, 1), Xs, + fun (_) -> t_prim_file_name() end); +type(prim_file, internal_normalize_utf8, 1, Xs) -> + strict(arg_types(prim_file, internal_normalize_utf8, 1), Xs, + fun (_) -> t_binary() end); %%-- gen_tcp ------------------------------------------------------------------ %% NOTE: All type information for this module added to avoid loss of precision type(gen_tcp, accept, 1, Xs) -> @@ -4216,6 +4228,15 @@ arg_types(file, write, 2) -> [t_file_io_device(), t_iodata()]; arg_types(file, write_file, 2) -> [t_file_name(), t_sup(t_binary(), t_list())]; +arg_types(file, native_name_encoding, 0) -> + []; +%%-- prim_file ---------------------------------------------------------------- +arg_types(prim_file, internal_name2native, 1) -> + [t_prim_file_name()]; +arg_types(prim_file, internal_native2name, 1) -> + [t_binary()]; +arg_types(prim_file, internal_normalize_utf8, 1) -> + [t_binary()]; %%------- gen_tcp ------------------------------------------------------------- arg_types(gen_tcp, accept, 1) -> [t_socket()]; @@ -4997,6 +5018,7 @@ t_file_io_device() -> t_file_name() -> t_sup([t_atom(), t_string(), + t_binary(), %% DeepList = [char() | atom() | DeepList] -- approximation below t_list(t_sup([t_atom(), t_string(), t_list()]))]). @@ -5051,6 +5073,10 @@ t_file_posix_error() -> t_file_return() -> t_sup(t_atom('ok'), t_tuple([t_atom('error'), t_file_posix_error()])). +t_prim_file_name() -> + t_sup([t_string(), + t_binary()]). + %% ===================================================================== %% These are used for the built-in functions of 'gen_tcp' %% ===================================================================== @@ -5253,6 +5279,9 @@ t_ML() -> % a binary or a possibly deep list of integers or binaries t_encoding() -> t_atoms(['latin1', 'unicode', 'utf8', 'utf16', 'utf32']). +t_file_encoding() -> + t_atoms(['latin1', 'utf8']). + t_encoding_a2b() -> % for the 2nd arg of atom_to_binary/2 and binary_to_atom/2 t_atoms(['latin1', 'unicode', 'utf8']). diff --git a/lib/kernel/src/code.erl b/lib/kernel/src/code.erl index ec256d5806..d37b74bc03 100644 --- a/lib/kernel/src/code.erl +++ b/lib/kernel/src/code.erl @@ -286,6 +286,8 @@ do_start(Flags) -> ets:module_info(module), os:module_info(module), + binary:module_info(module), + unicode:module_info(module), filename:module_info(module), lists:module_info(module), diff --git a/lib/kernel/test/code_SUITE.erl b/lib/kernel/test/code_SUITE.erl index 6f846ebc56..0c7440a13e 100644 --- a/lib/kernel/test/code_SUITE.erl +++ b/lib/kernel/test/code_SUITE.erl @@ -645,7 +645,7 @@ analyse([], [This={M,F,A}|Path], Visited, ErrCnt0) -> %% These modules should be loaded by code.erl before %% the code_server is started. OK = [erlang, os, prim_file, erl_prim_loader, init, ets, - code_server, lists, lists_sort, filename, packages, + code_server, lists, lists_sort, unicode, binary, filename, packages, gb_sets, gb_trees, hipe_unified_loader, hipe_bifs, prim_zip, zlib], ErrCnt1 = @@ -673,6 +673,22 @@ analyse2(MFA={_,_,_}, Path, Visited0) -> %%%% We need to check these manually... % fun's are ok as long as they are defined locally. +check_funs({'$M_EXPR','$F_EXPR',_}, + [{unicode,characters_to_binary_int,3}, + {unicode,characters_to_binary,3}, + {filename,filename_string_to_binary,1}|_]) -> 0; +check_funs({'$M_EXPR','$F_EXPR',_}, + [{unicode,ml_map,3}, + {unicode,characters_to_binary_int,3}, + {unicode,characters_to_binary,3}, + {filename,filename_string_to_binary,1}|_]) -> 0; +check_funs({'$M_EXPR','$F_EXPR',_}, + [{unicode,do_o_binary2,2}, + {unicode,do_o_binary,2}, + {unicode,o_trans,1}, + {unicode,characters_to_binary_int,3}, + {unicode,characters_to_binary,3}, + {filename,filename_string_to_binary,1}|_]) -> 0; check_funs({'$M_EXPR','$F_EXPR',_}, [{code_server,load_native_code,4}, {code_server,load_native_code_1,2}, diff --git a/lib/kernel/test/os_SUITE.erl b/lib/kernel/test/os_SUITE.erl index ace9501d18..eacf3c7584 100644 --- a/lib/kernel/test/os_SUITE.erl +++ b/lib/kernel/test/os_SUITE.erl @@ -204,8 +204,9 @@ evil(Config) when is_list(Config) -> evil_loop(Parent, ?EVIL_LOOPS,N) end) end, lists:seq(1, ?EVIL_PROCS)), - Devil = spawn(fun () -> devil(hd(Ps), hd(lists:reverse(Ps))) end), + Devil = spawn_link(fun () -> devil(hd(Ps), hd(lists:reverse(Ps))) end), lists:foreach(fun (P) -> receive {P, done} -> ok end end, Ps), + unlink(Devil), exit(Devil, kill), test_server:timetrap_cancel(Dog), ok. diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl index 16ed9a2c26..e4cdcf6125 100644 --- a/lib/stdlib/test/binary_module_SUITE.erl +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -186,7 +186,7 @@ badargs(Config) when is_list(Config) -> binary:match(<<1,2,3>>, {ac,ets:match_spec_compile([{'_',[],['$_']}])}, [{scope,{0,1}}])), - ?line nomatch = + ?line [] = ?MASK_ERROR(binary:matches(<<1,2,3>>,<<1>>,[{scope,{0,0}}])), ?line badarg = ?MASK_ERROR(binary:matches(<<1,2,3>>,{bm,<<>>},[{scope,{0,1}}])), -- cgit v1.2.3 From e5bd984329db28dc0e34cf9dd7f6a1cc97c3192c Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 29 Nov 2010 14:05:28 +0100 Subject: Adapt new soft and hard link routines on Windos to Unicode Also close find-handles in altname and other minor corrections to patch --- erts/emulator/drivers/win32/win_efile.c | 60 +++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c index 9fb4a66750..d0f13437dc 100755 --- a/erts/emulator/drivers/win32/win_efile.c +++ b/erts/emulator/drivers/win32/win_efile.c @@ -877,26 +877,28 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, /* * given that we know this is a symlink, we should be able to find its target */ - char target_name[256]; - if (efile_readlink(errInfo, name, target_name,256) == 1) { + WCHAR target_name[_MAX_PATH]; + if (efile_readlink(errInfo, (char *) name, + (char *) target_name,256) == 1) { + FindClose(findhandle); return efile_fileinfo(errInfo, pInfo, - target_name, info_for_link); + (char *) target_name, info_for_link); } } -#if 0 /* number of links: */ { HANDLE handle; /* Handle returned by CreateFile() */ BY_HANDLE_FILE_INFORMATION fileInfo; /* from CreateFile() */ - if (handle = CreateFile(name, GENERIC_READ, 0,NULL, + if (handle = CreateFileW(name, GENERIC_READ, 0,NULL, OPEN_EXISTING, 0, NULL)) { GetFileInformationByHandle(handle, &fileInfo); pInfo->links = fileInfo.nNumberOfLinks; CloseHandle(handle); - } + } else { + pInfo->links = 1; + } } -#endif #define GET_TIME(dst, src) \ if (!FileTimeToLocalFileTime(&findbuf.src, &LocalFTime) || \ @@ -1384,28 +1386,40 @@ efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size) * (Vista only) */ HINSTANCE hModule = NULL; + WCHAR *wname = (WCHAR *) name; + WCHAR *wbuffer = (WCHAR *) buffer; if ((hModule = LoadLibrary("kernel32.dll")) != NULL) { typedef DWORD (WINAPI * GETFINALPATHNAMEBYHANDLEPTR)( HANDLE hFile, - LPCSTR lpFilePath, + LPCWSTR lpFilePath, DWORD cchFilePath, DWORD dwFlags); GETFINALPATHNAMEBYHANDLEPTR pGetFinalPathNameByHandle = - (GETFINALPATHNAMEBYHANDLEPTR)GetProcAddress(hModule, "GetFinalPathNameByHandleA"); + (GETFINALPATHNAMEBYHANDLEPTR)GetProcAddress(hModule, "GetFinalPathNameByHandleW"); if (pGetFinalPathNameByHandle == NULL) { FreeLibrary(hModule); } else { /* first check if file is a symlink; {error, einval} otherwise */ - DWORD fileAttributes = GetFileAttributes(name); + DWORD fileAttributes = GetFileAttributesW(wname); if ((fileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)) { BOOLEAN success = 0; - HANDLE h = CreateFile(name, GENERIC_READ, 0,NULL, OPEN_EXISTING, 0, NULL); + HANDLE h = CreateFileW(wname, GENERIC_READ, 0,NULL, OPEN_EXISTING, 0, NULL); + int len; if(h != INVALID_HANDLE_VALUE) { - success = pGetFinalPathNameByHandle(h, buffer, size,0); + success = pGetFinalPathNameByHandle(h, wbuffer, size,0); /* GetFinalPathNameByHandle prepends path with "\\?\": */ - sprintf(buffer, buffer+4); + len = wcslen(wbuffer); + wmemmove(wbuffer,wbuffer+4,len-3); + if (len - 4 >= 2 && wbuffer[1] == L':' && wbuffer[0] >= L'A' && + wbuffer[0] <= L'Z') { + wbuffer[0] = wbuffer[0] + L'a' - L'A'; + } + + for ( ; *wbuffer; wbuffer++) + if (*wbuffer == L'\\') + *wbuffer = L'/'; CloseHandle(h); } FreeLibrary(hModule); @@ -1501,7 +1515,7 @@ efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) if (!*wbuffer) { wcscpy(wbuffer,wfd.cFileName); } - + FindClose(fh); return 1; } @@ -1512,7 +1526,9 @@ efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) int efile_link(Efile_error* errInfo, char* old, char* new) { - if(!CreateHardLink(new, old, NULL)) { + WCHAR *wold = (WCHAR *) old; + WCHAR *wnew = (WCHAR *) new; + if(!CreateHardLinkW(wnew, wold, NULL)) { return set_error(errInfo); } return 1; @@ -1526,22 +1542,24 @@ efile_symlink(Efile_error* errInfo, char* old, char* new) * (Vista only) */ HINSTANCE hModule = NULL; + WCHAR *wold = (WCHAR *) old; + WCHAR *wnew = (WCHAR *) new; if ((hModule = LoadLibrary("kernel32.dll")) != NULL) { typedef BOOLEAN (WINAPI * CREATESYMBOLICLINKFUNCPTR) ( - LPCSTR lpSymlinkFileName, - LPCSTR lpTargetFileName, + LPCWSTR lpSymlinkFileName, + LPCWSTR lpTargetFileName, DWORD dwFlags); CREATESYMBOLICLINKFUNCPTR pCreateSymbolicLink = (CREATESYMBOLICLINKFUNCPTR) GetProcAddress(hModule, - "CreateSymbolicLinkA"); - /* A for MBCS, W for UNICODE... char* above implies 'A'! */ + "CreateSymbolicLinkW"); + /* A for MBCS, W for UNICODE... char* above implies 'W'! */ if (pCreateSymbolicLink != NULL) { - DWORD attr = GetFileAttributes(old); + DWORD attr = GetFileAttributesW(wold); int flag = (attr != INVALID_FILE_ATTRIBUTES && attr & FILE_ATTRIBUTE_DIRECTORY) ? 1 : 0; /* SYMBOLIC_LINK_FLAG_DIRECTORY = 1 */ - BOOLEAN success = pCreateSymbolicLink(new, old, flag); + BOOLEAN success = pCreateSymbolicLink(wnew, wold, flag); FreeLibrary(hModule); if (success) { -- cgit v1.2.3 From c9d45aa9f32a76a4b08edd1d6010764d9ab02f9b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 30 Nov 2010 15:24:21 +0100 Subject: Treat soft links on Windows correctly in file_name_SUITE --- lib/kernel/test/file_name_SUITE.erl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index c96621d036..3faae93bea 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -193,7 +193,7 @@ check_normal(Mod) -> ?line Syms = [ {S,Targ,list_to_binary(get_data(Targ,normal_dir()))} || {T,S,Targ} <- normal_dir(), T =:= symlink ], ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], - ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line [ {ok, Targ} = fixlink(Mod:read_link(SymL)) || {SymL,Targ,_} <- Syms ], ?line chk_cre_dir(Mod,[{directory,"temp_dir",normal_dir()}]), ?line {ok,BeginAt} = Mod:get_cwd(), ?line true = is_list(BeginAt), @@ -279,7 +279,7 @@ check_icky(Mod) -> ?line Syms = [ {S,conv(Targ),list_to_binary(get_data(Targ,icky_dir()))} || {T,S,Targ} <- icky_dir(), T =:= symlink ], ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], - ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line [ {ok, Targ} = fixlink(Mod:read_link(SymL)) || {SymL,Targ,_} <- Syms ], ?line chk_cre_dir(Mod,[{directory,"åäö_dir",icky_dir()}]), ?line {ok,BeginAt} = Mod:get_cwd(), ?line true = is_list(BeginAt), @@ -416,7 +416,7 @@ check_very_icky(Mod) -> ?line Syms = [ {S,conv(Targ),list_to_binary(get_data(Targ,very_icky_dir()))} || {T,S,Targ} <- very_icky_dir(), T =:= symlink ], ?line [ {ok, Cont} = Mod:read_file(SymL) || {SymL,_,Cont} <- Syms ], - ?line [ {ok, Targ} = Mod:read_link(SymL) || {SymL,Targ,_} <- Syms ], + ?line [ {ok, Targ} = fixlink(Mod:read_link(SymL)) || {SymL,Targ,_} <- Syms ], ?line chk_cre_dir(Mod,[{directory,[1088,1079,1091]++"_dir",very_icky_dir()}]), ?line {ok,BeginAt} = Mod:get_cwd(), ?line true = is_list(BeginAt), @@ -691,6 +691,11 @@ treat_icky(Bin) -> Bin end. +fixlink({ok,Link}) -> + {ok,filename:basename(Link)}; +fixlink(X) -> + X. + procentify(<<>>) -> <<>>; procentify(<>) when X > 127 -> -- cgit v1.2.3 From b21d33041ef30182cbb8e74c0023dc282d069a26 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 30 Nov 2010 12:31:33 +0100 Subject: Teach filelib to use re in unicode mode when filenames are not raw --- erts/emulator/drivers/win32/win_efile.c | 3 -- erts/test/z_SUITE.erl | 2 + lib/kernel/test/file_name_SUITE.erl | 84 +++++++++++++++++++++++---------- lib/stdlib/src/filelib.erl | 29 +++++++----- 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c index d0f13437dc..4ec9579529 100755 --- a/erts/emulator/drivers/win32/win_efile.c +++ b/erts/emulator/drivers/win32/win_efile.c @@ -1519,9 +1519,6 @@ efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) return 1; } -/* - * XXX: link and symlink Implemented in pu (?), will need conversion - */ int efile_link(Efile_error* errInfo, char* old, char* new) diff --git a/erts/test/z_SUITE.erl b/erts/test/z_SUITE.erl index 8faddeb0d3..9f13a7083d 100644 --- a/erts/test/z_SUITE.erl +++ b/erts/test/z_SUITE.erl @@ -253,6 +253,8 @@ core_file_search(#core_search_conf{search_dir = Base, core_cand(Conf, Core, Cores); "core." ++ _ -> core_cand(Conf, Core, Cores); + Bin when is_binary(Bin) -> %Icky filename; ignore + Cores; BName -> case lists:suffix(".core", BName) of true -> core_cand(Conf, Core, Cores); diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 3faae93bea..d91bc32d11 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -140,36 +140,46 @@ icky(suite) -> icky(doc) -> "Check file operations on normal file names regardless of unicode mode"; icky(Config) when is_list(Config) -> - {ok,Dir} = file:get_cwd(), - try - Priv = ?config(priv_dir, Config), - file:set_cwd(Priv), - put(file_module,prim_file), - ok = check_icky(prim_file), - put(file_module,file), - ok = check_icky(file) - after - file:set_cwd(Dir) + case hopeless_darwin() of + true -> + {skipped,"This version of darwin does not support icky names at all."}; + false -> + {ok,Dir} = file:get_cwd(), + try + Priv = ?config(priv_dir, Config), + file:set_cwd(Priv), + put(file_module,prim_file), + ok = check_icky(prim_file), + put(file_module,file), + ok = check_icky(file) + after + file:set_cwd(Dir) + end end. very_icky(suite) -> []; very_icky(doc) -> "Check file operations on normal file names regardless of unicode mode"; very_icky(Config) when is_list(Config) -> - {ok,Dir} = file:get_cwd(), - try - Priv = ?config(priv_dir, Config), - file:set_cwd(Priv), - put(file_module,prim_file), - case check_very_icky(prim_file) of - need_unicode_mode -> - {skipped,"VM needs to be started in Unicode filename mode"}; - ok -> - put(file_module,file), - ok = check_very_icky(file) - end - after - file:set_cwd(Dir) + case hopeless_darwin() of + true -> + {skipped,"This version of darwin does not support icky names at all."}; + false -> + {ok,Dir} = file:get_cwd(), + try + Priv = ?config(priv_dir, Config), + file:set_cwd(Priv), + put(file_module,prim_file), + case check_very_icky(prim_file) of + need_unicode_mode -> + {skipped,"VM needs to be started in Unicode filename mode"}; + ok -> + put(file_module,file), + ok = check_very_icky(file) + end + after + file:set_cwd(Dir) + end end. @@ -481,6 +491,21 @@ check_very_icky(Mod) -> FI#file_info{mode = NewMode2}), ?line {ok,#file_info{mode = NewMode2}} = Mod:read_file_info([956,965,963,954,959,49]), + ?line NumOK0 = case has_links() of + true -> 5; + false -> 3 + end, + ?line NumNOK0 = case has_links() of + true -> 4; + false -> 3 + end, + ?line {NumOK,NumNOK} = case is_binary(treat_icky(<<"foo">>)) of + false -> + {NumOK0+NumNOK0,0}; + true -> + {NumOK0,NumNOK0} + end, + ?line {NumOK,NumNOK} = filelib:fold_files(".",".*",true,fun(_F,{N,M}) when is_list(_F) -> io:format("~ts~n",[_F]),{N+1,M}; (_F,{N,M}) -> io:format("~p~n",[_F]),{N,M+1} end,{0,0}), ok catch throw:need_unicode_mode -> @@ -548,6 +573,7 @@ rm_r2(Mod,Dir) -> chk_cre_dir(_,[]) -> ok; chk_cre_dir(Mod,[{regular,Name,Content}|T]) -> + %io:format("~p~n",[Name]), ok = Mod:write_file(Name,Content), chk_cre_dir(Mod,T); chk_cre_dir(Mod,[{link,Name,Target}|T]) -> @@ -558,7 +584,9 @@ chk_cre_dir(Mod,[{symlink,Name,Target}|T]) -> chk_cre_dir(Mod,T); chk_cre_dir(Mod,[{directory,Name,Content}|T]) -> ok = Mod:make_dir(Name), + %io:format("Content = ~p~n",[Content]), Content2 = [{Ty,filename:join(Name,N),case Ty of link -> filename:join(Name,C); _ -> C end} || {Ty,N,C} <- Content ], + %io:format("Content2 = ~p~n",[Content2]), chk_cre_dir(Mod,Content2), chk_cre_dir(Mod,T). @@ -627,6 +655,14 @@ linkify(Passed,[{directory, Name, Content}|T]) -> linkify(Passed,[H|T]) -> [H|linkify([H|Passed],T)]. +hopeless_darwin() -> + case {os:type(),os:version()} of + {{unix,darwin},{Major,_,_}} when Major < 9 -> + true; + _ -> + false + end. + icky_dir() -> [{regular,"fil1","fil1"}, {regular,"åäö2","åäö2"}] ++ diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d5ddf9ed7e..eaaca750ab 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -166,36 +166,41 @@ do_is_regular(File, Mod) -> %% If is true all sub-directories to are processed do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) -> - {ok, Re1} = re:compile(RegExp), - do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod). + {ok, Re1} = re:compile(RegExp,[unicode]), + do_fold_files1(Dir, Re1, RegExp, Recursive, Fun, Acc, Mod). -do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) -> +do_fold_files1(Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod) -> case eval_list_dir(Dir, Mod) of - {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod); + {ok, Files} -> do_fold_files2(Files, Dir, RegExp, OrigRE, + Recursive, Fun, Acc, Mod); {error, _} -> Acc end. -do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) -> +%% OrigRE is not to be compiled as it's for non conforming filenames, +%% i.e. for filenames that does not comply to the current encoding, which should +%% be very rare. We use it only in those cases and do not want to precompile. +do_fold_files2([], _Dir, _RegExp, _OrigRE, _Recursive, _Fun, Acc, _Mod) -> Acc; -do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) -> +do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, RegExp, [{capture,none}]) of + case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, + [{capture,none}]) of match -> Acc = Fun(FullName, Acc0), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); nomatch -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; false -> case Recursive andalso do_is_dir(FullName, Mod) of true -> - Acc1 = do_fold_files1(FullName, RegExp, Recursive, + Acc1 = do_fold_files1(FullName, RegExp, OrigRE, Recursive, Fun, Acc0, Mod), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc1, Mod); false -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end end. -- cgit v1.2.3 From 9fbce40dcbda8d4e7888507d646bfee22bf7d311 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 08:45:42 +0100 Subject: Correct testcase regarding windows versions supporting soft links. --- lib/kernel/test/file_name_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index d91bc32d11..02b8d12478 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -594,7 +594,7 @@ has_links() -> case os:type() of {win32,_} -> case os:version() of - {N,NN,_} when (N > 5) orelse (NN > 1) -> + {N,NN,_} when (N > 5) andalso (NN >= 1) -> true; _ -> false -- cgit v1.2.3 From 30ad8cfadba47123920128023cb06af6e0922746 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 15:11:07 +0100 Subject: Mend on_load_embedded testcase which did not handle windows links --- lib/kernel/test/code_SUITE.erl | 13 +++++++++++-- .../on_load_app-1.0/src/on_load_embedded.erl | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/kernel/test/code_SUITE.erl b/lib/kernel/test/code_SUITE.erl index 0c7440a13e..e52f8a0e37 100644 --- a/lib/kernel/test/code_SUITE.erl +++ b/lib/kernel/test/code_SUITE.erl @@ -1268,7 +1268,8 @@ on_load_embedded_1(Config) -> ?line LibRoot = code:lib_dir(), ?line LinkName = filename:join(LibRoot, "on_load_app-1.0"), ?line OnLoadApp = filename:join(DataDir, "on_load_app-1.0"), - ?line file:delete(LinkName), + ?line del_link(LinkName), + io:format("LinkName :~p, OnLoadApp: ~p~n",[LinkName,OnLoadApp]), case file:make_symlink(OnLoadApp, LinkName) of {error,enotsup} -> throw({skip,"Support for symlinks required"}); @@ -1297,7 +1298,15 @@ on_load_embedded_1(Config) -> %% Clean up. ?line stop_node(Node), - ?line ok = file:delete(LinkName). + ?line ok = del_link(LinkName). + +del_link(LinkName) -> + case file:delete(LinkName) of + {error,eperm} -> + file:del_dir(LinkName); + Other -> + Other + end. create_boot(Config, Options) -> ?line {ok, OldDir} = file:get_cwd(), diff --git a/lib/kernel/test/code_SUITE_data/on_load_app-1.0/src/on_load_embedded.erl b/lib/kernel/test/code_SUITE_data/on_load_app-1.0/src/on_load_embedded.erl index b7fdd4d9ae..646921026d 100644 --- a/lib/kernel/test/code_SUITE_data/on_load_app-1.0/src/on_load_embedded.erl +++ b/lib/kernel/test/code_SUITE_data/on_load_app-1.0/src/on_load_embedded.erl @@ -8,7 +8,7 @@ run_me() -> LibDir = code:lib_dir(on_load_app), PrivDir = code:priv_dir(on_load_app), LibDir = filename:dirname(PrivDir), - ModPath = code:which(?MODULE), + ModPath = filename:join(filename:split(code:which(?MODULE))), LibDir = filename:dirname(filename:dirname(ModPath)), %% Start a process to remember that the on_load was called. -- cgit v1.2.3 From 1ab2f46d91bce11bdc5b7ec65d3b3df46eadb105 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:34:49 +0100 Subject: Make filelib not crash on re codepoints beyond 255 in re when filename is raw --- lib/stdlib/src/filelib.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index eaaca750ab..04147d40d1 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -185,11 +185,14 @@ do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, - [{capture,none}]) of + case (catch re:run(File, if is_binary(File) -> OrigRE; + true -> RegExp end, + [{capture,none}])) of match -> Acc = Fun(FullName, Acc0), do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); + {'EXIT',_} -> + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod); nomatch -> do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; -- cgit v1.2.3 From 3e6877b06ae395a9d4310ef664d0360867a47f62 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:35:40 +0100 Subject: Add documentation about raw filenames and Unicode file name translation mode --- erts/doc/src/erl.xml | 13 ++++++++ lib/kernel/doc/src/file.xml | 72 ++++++++++++++++++++++++++++++++++++++++- lib/stdlib/doc/src/filelib.xml | 27 ++++++++++++++-- lib/stdlib/doc/src/filename.xml | 15 +++++++-- 4 files changed, 120 insertions(+), 7 deletions(-) diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index 9224d73b6f..77bd952d41 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -550,6 +550,19 @@

Force the compressed option on all ETS tables. Only intended for test and evaluation.

+ + +

The VM works with file names as if they are encoded using the ISO-latin-1 encoding, disallowing Unicode characters with codepoints beyond 255. This is default on operating systems that have transparent file naming, i.e. all Unixes except MacOSX.

+
+ + +

The VM works with file names as if they are encoded using UTF-8 (or some other system specific Unicode encoding). This is the default on operating systems that enforce Unicode encoding, i.e. Windows and MacOSX.

+

By enabling Unicode file name translation on systems where this is not default, you open up to the possibility that some file names can not be interpreted by the VM and therefore will be returned to the program as raw binaries. The option is therefore considered experimental.

+
+ + +

Selection between +fnl and +fnu is done based on the current locale settings in the OS, meaning that if you have set your terminal for UTF-8 encoding, the filesystem is expected to use the same encoding for filenames (use with care).

+

Sets the default heap size of processes to the size diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml index 64cdd3a8ea..d3441d3623 100644 --- a/lib/kernel/doc/src/file.xml +++ b/lib/kernel/doc/src/file.xml @@ -36,6 +36,61 @@ other Erlang processes to continue executing in parallel with the file operations. See the command line flag +A in erl(1).

+ +

The Erlang VM supports file names in Unicode to a limited + extent. Depending on how the VM is started (with the parameter + +fnu or +fnl), file names given can contain + characters > 255 and the VM system will convert file names + back and forth to the native file name encoding.

+ +

The default behavior for Unicode character translation depends + on to what extent the underlying OS/filesystem enforces consistent + naming. On OSes where all file names are ensured to be in one or + another encoding, Unicode is the default (currently this holds for + Windows and MacOSX). On OSes with completely transparent file + naming (i.e. all Unixes except MacOSX), ISO-latin-1 file naming is + the default. The reason for the ISO-latin-1 default is that + file names are not guaranteed to be possible to interpret according to + the Unicode encoding expected (i.e. UTF-8), and file names that + cannot be decoded will only be accessible by using "raw + file names", in other word file names given as binaries.

+ +

As file names are traditionally not binaries in Erlang, + applications that need to handle raw file names need to be + converted, why the Unicode mode for file names is not default on + systems having completely transparent file naming.

+ + As of R14B01, the most basic file handling modules + (file, prim_file, filelib and + filename) accept raw file names, but the rest of OTP is not + guaranteed to handle them, why Unicode file naming on systems + where it is not default is still considered experimental. + +

Raw file names is a new feature in OTP R14B01, which allows the + user to supply completely uninterpreted file names to the + underlying OS/filesystem. They are supplied as binaries, where it + is up to the user to supply a correct encoding for the + environment. The function file:native_name_encoding() can + be used to check what encoding the VM is working in. If the + function returns latin1 file names are not in any way + converted to Unicode, if it is utf8, raw file names should + be encoded as UTF-8 if they are to follow the convention of the VM + (and usually the convention of the OS as well). Using raw + file names is useful if you have a filesystem with inconsistent + file naming, where some files are named in UTF-8 encoding while + others are not. A file:list_dir on such mixed file name systems + when the VM is in Unicode file name mode might return file names as + raw binaries as they cannot be interpreted as Unicode + file names. Raw file names can also be used to give UTF-8 encoded + file names even though the VM is not started in Unicode file name + translation mode.

+ +

Note that on Windows, file:native_name_encoding() + returns utf8 per default, which is the format for raw + file names even on Windows, although the underlying OS specific + code works in a limited version of little endian UTF16. As far as + the Erlang programmer is concerned, Windows native Unicode format + is UTF-8...

@@ -47,8 +102,14 @@ iodata() = iolist() | binary() io_device() as returned by file:open/2, a process handling IO protocols -name() = string() | atom() | DeepList +name() = string() | atom() | DeepList | RawFilename DeepList = [char() | atom() | DeepList] + RawFilename = binary() + If VM is in unicode filename mode, string() and char() are allowed to be > 255. + RawFilename is a filename not subject to Unicode translation, meaning that it + can contain characters not conforming to the Unicode encoding expected from the + filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode + filename mode). posix() an atom which is named from the POSIX error codes used in @@ -597,6 +658,15 @@ f.txt: {person, "kalle", 25}. + + native_name_encoding() -> latin1 | utf8 + Retunr the VMs configure filename encoding. + +

This function returns the configured default file name encoding to use for raw file names. Generally an application supplying file names raw (as binaries), should obey the character encoding returned by this function.

+

By default, the VM uses ISO-latin-1 file name encoding on filesystems and/or OSes that use completely transparent file naming. This includes all Unix versions except for MacOSX, where the vfs layer enforces UTF-8 file naming. By giving the experimental option +fnu when starting Erlang, UTF-8 translation of file names can be turned on even for those systems. If Unicode file name translation is in effect, the system behaves as usual as long as file names conform to the encoding, but will return file names that are not properly encoded in UTF-8 as raw file names (i.e. binaries).

+

On Windows, this function also returns utf8 by default. The OS uses a pure Unicode naming scheme and file names are always possible to interpret as valid Unicode. The fact that the underlying Windows OS actually encodes file names using little endian UTF-16 can be ignored by the Erlang programmer. Windows and MacOSX are the only operating systems where the VM operates in Unicode file name mode by default.

+
+
open(Filename, Modes) -> {ok, IoDevice} | {error, Reason} Open a file diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 4ff3b22f32..969aff4fcb 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -36,14 +36,23 @@

This module contains utilities on a higher level than the file module.

+

The module supports Unicode file names, so that it will match against regular expressions given in Unicode and that it will find and process raw file names (i.e. files named in a way that does not confirm to the expected encoding).

+

If the VM operates in Unicode file naming mode on a machine with transparent file naming, the fun() provided to fold_files/5 needs to be prepared to handle binary file names.

+

For more information about raw file names, see the file module.

DATA TYPES -filename() = string() | atom() | DeepList -dirname() = filename() -DeepList = [char() | atom() | DeepList] +filename() = = string() | atom() | DeepList | RawFilename + DeepList = [char() | atom() | DeepList] + RawFilename = binary() + If VM is in unicode filename mode, string() and char() are allowed to be > 255. + RawFilename is a filename not subject to Unicode translation, meaning that it + can contain characters not conforming to the Unicode encoding expected from the + filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode + filename mode). +dirname() = filename()
@@ -90,6 +99,18 @@ DeepList = [char() | atom() | DeepList] If Recursive is true all sub-directories to Dir are processed. The regular expression matching is done on just the filename without the directory part.

+ +

If Unicode file name translation is in effect and the file + system is completely transparent, file names that cannot be + interpreted as Unicode may be encountered, in which case the + fun() must be prepared to handle raw file names + (i.e. binaries). If the regular expression contains + codepoints beyond 255, it will not match file names that does + not conform to the expected character encoding (i.e. are not + encoded in valid UTF-8).

+ +

For more information about raw file names, see the + file module.

diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml index fe6c6f898e..cdee6e4a81 100644 --- a/lib/stdlib/doc/src/filename.xml +++ b/lib/stdlib/doc/src/filename.xml @@ -4,7 +4,7 @@
- 19972009 + 19972010 Ericsson AB. All Rights Reserved. @@ -43,13 +43,22 @@ only, even if the arguments contain back slashes. Use join/1 to normalize a file name by removing redundant directory separators.

+

The module supports raw file names in the way that if a binary is present, or the file name cannot be interpreted according to the return value of + file:native_name_encoding/0, a raw file name will also be returned. For example filename:join/1 provided with a path component being a binary (and also not being possible to interpret under the current native file name encoding) will result in a raw file name being returned (the join operation will have been performed of course). For more information about raw file names, see the file module.

DATA TYPES -name() = string() | atom() | DeepList -DeepList = [char() | atom() | DeepList] +name() = string() | atom() | DeepList | RawFilename + DeepList = [char() | atom() | DeepList] + RawFilename = binary() + If VM is in unicode filename mode, string() and char() are allowed to be > 255. + RawFilename is a filename not subject to Unicode translation, meaning that it + can contain characters not conforming to the Unicode encoding expected from the + filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode + filename mode). +
-- cgit v1.2.3 From 47c0ced5a05874c3cbfdad532abbd8900c40e966 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:46:45 +0100 Subject: Correct bug in file_name_SUITE making it fail on Unix instead of Windows7 --- lib/kernel/test/file_name_SUITE.erl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 02b8d12478..8829e93473 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -506,6 +506,7 @@ check_very_icky(Mod) -> {NumOK0,NumNOK0} end, ?line {NumOK,NumNOK} = filelib:fold_files(".",".*",true,fun(_F,{N,M}) when is_list(_F) -> io:format("~ts~n",[_F]),{N+1,M}; (_F,{N,M}) -> io:format("~p~n",[_F]),{N,M+1} end,{0,0}), + ?line ok = filelib:fold_files(".",[1076,1089,1072,124,46,42],true,fun(_F,_) -> ok end,false), ok catch throw:need_unicode_mode -> @@ -727,8 +728,14 @@ treat_icky(Bin) -> Bin end. +% Handle windows having absolute soft link targets. fixlink({ok,Link}) -> - {ok,filename:basename(Link)}; + case os:type() of + {win32,_} -> + {ok,filename:basename(Link)}; + _ -> + {ok,Link} + end; fixlink(X) -> X. -- cgit v1.2.3 From 6ba7c90dd6ccd7a6a6c661c51f17c44a124d2182 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 2 Dec 2010 14:55:03 +0100 Subject: Add section about Unicode file names to stdlib users guide --- lib/stdlib/doc/src/unicode_usage.xml | 44 +++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index f1b0659ea2..df8e6c6b47 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -5,7 +5,7 @@
1999 - 2009 + 2010 Ericsson AB. All Rights Reserved. @@ -168,6 +168,48 @@ Eshell V5.7 (abort with ^G) Unicode characters in allowed and disallowed context
+Unicode file names +

Most modern operating systems support Unicode file names in some way or another. There are several different ways to do this and Erlang by default treats the different approaches differently:

+ +Mandatory Unicode file naming + +

Windows and, for most common uses, MacOSX enforces Unicode support for file names. All files created in the filesystem has names that can consistently be interpreted. In MacOSX, all file names are retrieved in UTF-8 encoding, while Windows have selected an approach where each system call handling file names has a special Unicode aware variant, giving much the same effect. There are no file names on these systems that are not Unicode file names, why the default behavior of the Erlang VM is to work in "Unicode file name translation mode", meaning that a file name can be given as a Unicode list and that will be automatically translated to the proper name encoding for the underlying operating and file system.

+

Doing i.e. a file:list_dir/1 on one of these systems may return Unicode lists with codepoints beyond 255, depending on the content of the actual filesystem.

+

As the feature is fairly new, you may still stumble upon non core applications that cannot handle being provided with file names containing characters with codepoints larger than 255, but the core Erlang system should have no problems with Unicode file names.

+
+Transparent file naming + +

Most Unix operating systems have adopted a simpler approach, namely that Unicode file naming is not enforced, but by convention. Those systems usually use UTF-8 encoding for Unicode file names, but do not enforce it. On such a system, a file name containing characters having codepoints between 128 and 255 may be named either as plain ISO-latin-1 or using UTF-8 encoding. As no consistency is enforced, the Erlang VM can do no consistent translation of all file names. If the VM would automatically select encoding based on heuristics, one could get unexpected behavior on these systems, therefore file names not being encoded in UTF-8 are returned as "raw file names" if Unicode file naming support is turned on.

+

A raw file name is not a list, but a binary. Many non core applications still does not handle file names given as binaries, why such raw names are avoided by default. This means that systems having implemented Unicode file naming through transparent file systems and an UTF-8 convention, does not by default have Unicode file naming turned on. Explicitly turning Unicode file name handling on for these types of systems is considered experimental.

+
+
+

The Unicode file naming support was introduced with OTP release R14B01. A VM operating in Unicode file mode can work with files having names in any language or character set (as long as it's supported by the underlying OS and file system). The Unicode character list is used to denote file or directory names and if the file system content is listed, you will also be able to get Unicode lists as return value. The support lies in the kernel and stdlib modules, why most applications (that does not explicitly require the file names to be in the ISO-latin-1 range) will benefit from the Unicode support without change.

+ +

On Operating systems with mandatory Unicode file names, this means that you more easily conform to the file names of other (non Erlang) applications, and you can also process file names that, at least on Windows, where completely inaccessible (due to having names that could not be represented in ISO-latin-1). Also you will avoid creating incomprehensible file names on MacOSX as the vfs layer of the OS will accept all your file names as UTF-8 and will not rewrite them.

+ +

For most systems, turning on Unicode file name translation is no problem even if it uses transparent file naming. Very few systems have mixed file name encodings. A consistent UTF-8 named system will work perfectly in Unicode file name mode. It is still however considered experimental in R14B01. Unicode file name translation is turned on with the +fnu switch to the erl program. If the VM is started in Unicode file name translation mode, file:native_name_encoding/0 will return the atom utf8.

+ +

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the argv option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

+ +

It is worth noting that the file encoding options given when opening a file has nothing to do with the file name encoding convention. You can very well open files containing UTF-8 but having file names in ISO-latin-1 or vice versa.

+ +Erlang drivers and NIF shared objects still can not be named with names containing codepoints beyond 127. This is a known limitation to be removed in a future release. Erlang modules however can, but it is definitely not a good idea and is still considered experimental. + +
+Notes about raw file names and automatic file name conversion +

Raw file names is introduced together with Unicode file name support in erts-5.8.2 (OTP R14B01). The reason the "raw file names" is introduced in the system is to be able to consistently represent file names given in different encodings on the same system. Having the VM automatically translate a file name that is not in UTF-8 when to a list of Unicode characters might seem practical, but this would open up for both duplicate file names and other inconsistent behavior. Consider a directory containing a file named "björn" in ISO-latin-1, while the Erlang VM is operating in Unicode file name mode (and therefore expecting UTF-8 file naming). The ISO-latin-1 name is not valid UTF-8 and one could be tempted to think that automatic conversion in for example file:list_dir/1 is a good idea. But what would happen if we later tried to open the file and has the name as a Unicode list (magically converted from the ISO-latin-1 file name)? The VM will convert the file name given to UTF-8, as this is the encoding expected. Effectively this means trying to open the file named <<"björn"/utf8>>. This file does not exist, and even if it existed it would not be the same file as the one that was listed. We could even create two files named "björn", one named in the UTF-8 encoding and one not. If file:list_dir/1 would automatically convert the ISO-latin-1 file name to a list, we would get two identical file names as the result. To avoid this, we need to differentiate between file names being properly encoded according to the Unicode file naming convention (i.e. UTF-8) and file names being invalid under the encoding. This is done by representing invalid encoding as "raw" file names, i.e. as binaries.

+

The core system of Erlang (kernel and stdlib) accept raw file names except for loadable drivers and executables invoked using open_port({spawn, ...} ...). open_port({spawn_executable, ...} ...) however does accept them. As mentioned earlier, the arguments given in the option list to open_port({spawn_executable, ...} ...) undergo the same conversion as the file names, meaning that the executable will be provided with arguments in UTF-8 as well. This translation is avoided consistently with how the file names are treated, by giving the argument as a binary.

+

To force Unicode file name translation mode on systems where this is not the default is considered experimental in OTP R14B01 due to the raw file names possibly being a new experience to the programmer and that the non core applications of OTP are not tested for compliance with raw file names yet. Unicode file name translation is expected to be default in future releases.

+

If working with raw file names, one can still conform to the encoding convention of the Erlang VM by using the file:native_name_encoding/0 function, which returns either the atom latin1 or the atom utf8 depending on the file name translation mode. On Linux, an VM started without explicitly stating the file name translation mode will default to latin1 as the native file name encoding, why file names on the disk encoded as UTF-8 will be returned as a list of the names interpreted as ISO-latin-1. The "UTF-8 list" is not a practical type for displaying or operating on in Erlang, but it is backward compatible and usable in all functions requiring a file name. On Windows and MacOSX, the default behavior is that of file name translation, why the file:native_name_encoding/0 by default returns utf8 on those systems (the fact that Windows actually does not use UTF-8 on the file system level can safely be ignored by the Erlang programmer). The default behavior can be changed using the +fnu or +fnl options to the VM, see the erl command manual page.

+

Even if you are operating without Unicode file naming translation automatically done by the VM, you can access and create files with names in UTF-8 encoding by using raw file names encoded as UTF-8. Enforcing the UTF-8 encoding regardless of the mode the Erlang VM is started in might, in some circumstances be a good idea, as the convention of using UTF-8 file names is spreading.

+
+
+Notes about MacOSX +

MacOSXs vfs layer enforces UTF-8 file names in a quite aggressive way. Older versions did this by simply refusing to create non UTF-8 conforming file names, while newer versions replace offending bytes with the sequence "%HH", where HH is the original character in hexadecimal notation. As Unicode translation is enabled by default on MacOSX, the only way to come up against this is to either start the VM with the +fnl flag or to use a raw file name in latin1 encoding. In that case, the file can not be opened with the same name as the one used to create this. The problem is by design in newer versions of MacOSX.

+

MacOSX also reorganizes the names of files so that the representation of accents etc is denormalized, i.e. the character ö is represented as the codepoints [111,776], where 111 is the character o and 776 is a special accent character. This type of denormalized Unicode is otherwise very seldom used and Erlang normalizes those file names on retrieval, so that denormalized file names is not passed up to the Erlang application. In Erlang the file name "björn" is retrieved as [98,106,246,114,110], not as [98,106,117,776,114,110], even though the file system might think differently.

+
+
+
Unicode-aware modules

Most of the modules in Erlang/OTP are of course Unicode-unaware in the sense that they have no notion of Unicode and really shouldn't have. Typically they handle non-textual or byte-oriented data (like gen_tcp etc).

Modules that actually handle textual data (like io_lib, string etc) are sometimes subject to conversion or extension to be able to handle Unicode characters.

-- cgit v1.2.3 From 6ea8348174c62812057dd552d0890b2d9d4a3c16 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 09:51:00 +0100 Subject: Add documentation to erlang.xml and slight correction to unicode_usage.xml --- erts/doc/src/erlang.xml | 46 +++++++++++++++++++++++++++++++++--- lib/stdlib/doc/src/unicode_usage.xml | 2 +- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 638f7eef10..78d58a1e56 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -2781,14 +2781,17 @@ os_prompt% open_port(PortName, PortSettings) -> port() Open a port - PortName = {spawn, Command} | {spawn_driver, Command} | {spawn_executable, Command} | {fd, In, Out} + PortName = {spawn, Command} | {spawn_driver, Command} | {spawn_executable, FileName} | {fd, In, Out}  Command = string() +  FileName = [ FileNameChar ] | binary() +  FileNameChar = int() (1..255 or any Unicode codepoint, see description)  In = Out = int() PortSettings = [Opt] -  Opt = {packet, N} | stream | {line, L} | {cd, Dir} | {env, Env} | {args, [ string() ]} | {arg0, string()} | exit_status | use_stdio | nouse_stdio | stderr_to_stdout | in | out | binary | eof +  Opt = {packet, N} | stream | {line, L} | {cd, Dir} | {env, Env} | {args, [ ArgString ]} | {arg0, ArgString} | exit_status | use_stdio | nouse_stdio | stderr_to_stdout | in | out | binary | eof   N = 1 | 2 | 4   L = int()   Dir = string() +   ArgString = [ FileNameChar ] | binary()   Env = [{Name, Val}]    Name = string()    Val = string() | false @@ -2851,7 +2854,26 @@ os_prompt% executed, the appropriate command interpreter will implicitly be invoked, but there will still be no command argument expansion or implicit PATH search.

- + +

The name of the executable as well as the arguments + given in args and arg0 is subject to + Unicode file name translation if the system is running + in Unicode file name mode. To avoid + translation or force i.e. UTF-8, supply the executable + and/or arguments as a binary in the correct + encoding. See the file module, the + + file:native_name_encoding/0 function and the + stdlib users guide + for details.

+ + The characters in the name (if given as a list) + can only be > 255 if the Erlang VM is started in + Unicode file name translation mode, otherwise the name + of the executable is limited to the ISO-latin-1 + character set. +

If the Command cannot be run, an error exception, with the posix error code as the reason, is raised. The error reason may differ between operating @@ -2954,6 +2976,21 @@ os_prompt% should not be given in this list. The proper executable name will automatically be used as argv[0] where applicable.

+

When the Erlang VM is running in Unicode file name + mode, the arguments can contain any Unicode characters and + will be translated into whatever is appropriate on the + underlying OS, which means UTF-8 for all platforms except + Windows, which has other (more transparent) ways of + dealing with Unicode arguments to programs. To avoid + Unicode translation of arguments, they can be supplied as + binaries in whatever encoding is deemed appropriate.

+ + The characters in the arguments (if given as a + list of characters) can only be > 255 if the Erlang + VM is started in Unicode file name mode, + otherwise the arguments are limited to the + ISO-latin-1 character set. +

If one, for any reason, wants to explicitly set the program name in the argument vector, the arg0 option can be used.

@@ -2969,6 +3006,9 @@ os_prompt% responds to this is highly system dependent and no specific effect is guaranteed.

+

The unicode file name translation rules of the + args option apply to this option as well.

+ exit_status diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index df8e6c6b47..c02ea3cbcb 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -189,7 +189,7 @@ Eshell V5.7 (abort with ^G)

For most systems, turning on Unicode file name translation is no problem even if it uses transparent file naming. Very few systems have mixed file name encodings. A consistent UTF-8 named system will work perfectly in Unicode file name mode. It is still however considered experimental in R14B01. Unicode file name translation is turned on with the +fnu switch to the erl program. If the VM is started in Unicode file name translation mode, file:native_name_encoding/0 will return the atom utf8.

-

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the argv option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

+

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the args option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

It is worth noting that the file encoding options given when opening a file has nothing to do with the file name encoding convention. You can very well open files containing UTF-8 but having file names in ISO-latin-1 or vice versa.

-- cgit v1.2.3 From 159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 13:16:21 +0100 Subject: Test and correct filelib and filename --- lib/kernel/test/file_name_SUITE.erl | 3 + lib/stdlib/src/filelib.erl | 28 +++- lib/stdlib/src/filename.erl | 28 ++-- lib/stdlib/test/filelib_SUITE.erl | 7 +- lib/stdlib/test/filename_SUITE.erl | 315 +++++++++++++++++++++++++++++++++++- 5 files changed, 363 insertions(+), 18 deletions(-) diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 8829e93473..fea4df8539 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -507,6 +507,9 @@ check_very_icky(Mod) -> end, ?line {NumOK,NumNOK} = filelib:fold_files(".",".*",true,fun(_F,{N,M}) when is_list(_F) -> io:format("~ts~n",[_F]),{N+1,M}; (_F,{N,M}) -> io:format("~p~n",[_F]),{N,M+1} end,{0,0}), ?line ok = filelib:fold_files(".",[1076,1089,1072,124,46,42],true,fun(_F,_) -> ok end,false), + ?line SF3 = unicode:characters_to_binary("åäösubfil3",file:native_name_encoding()), + ?line Sorted = lists:sort([SF3,<<"åäösubfil2">>]), + ?line Sorted = lists:sort(filelib:wildcard("*",<<"åäösubdir2">>)), ok catch throw:need_unicode_mode -> diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 04147d40d1..c845b61204 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -47,14 +47,14 @@ wildcard(Pattern) when is_list(Pattern) -> ?HANDLE_ERROR(do_wildcard(Pattern, file)). -spec wildcard(file:name(), file:name() | atom()) -> [file:filename()]. -wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> +wildcard(Pattern, Cwd) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) - when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> + when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). -spec is_dir(file:name()) -> boolean(). @@ -118,7 +118,7 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> do_wildcard_1([Base], Rest, Mod). -do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), is_list(Cwd) -> +do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> @@ -127,9 +127,18 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> _ -> [] end; do_wildcard_comp({compiled_wildcard,[current|Rest]}, Cwd0, Mod) -> - Cwd = filename:join([Cwd0]), %Slash away redundant slashes. - PrefixLen = length(Cwd)+1, - [lists:nthtail(PrefixLen, N) || N <- do_wildcard_1([Cwd], Rest, Mod)]; + {Cwd,PrefixLen} = case filename:join([Cwd0]) of + Bin when is_binary(Bin) -> {Bin,byte_size(Bin)+1}; + Other -> {Other,length(Other)+1} + end, %Slash away redundant slashes. + [ + if + is_binary(N) -> + <<_:PrefixLen/binary,Res/binary>> = N, + Res; + true -> + lists:nthtail(PrefixLen, N) + end || N <- do_wildcard_1([Cwd], Rest, Mod)]; do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> do_wildcard_1([Base], Rest, Mod). @@ -276,6 +285,13 @@ do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. +wildcard_4(Pattern, [File|Rest], Base, Result) when is_binary(File) -> + case wildcard_5(Pattern, binary_to_list(File)) of + true -> + wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + false -> + wildcard_4(Pattern, Rest, Base, Result) + end; wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 9ca4b808e1..e38b8957f2 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -151,10 +151,10 @@ win_basenameb(O) -> basenameb(O,[<<"/">>,<<"\\">>]). basenameb(Bin,Sep) -> Parts = [ X || X <- binary:split(Bin,Sep,[global]), - X =:= <<>> ], + X =/= <<>> ], if Parts =:= [] -> - []; + <<>>; true -> lists:last(Parts) end. @@ -201,17 +201,19 @@ basename(Name, Ext) when is_list(Name), is_binary(Ext) -> basename(filename_string_to_binary(Name),Ext); basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> BName = basename(Name), + LAll = byte_size(Name), LN = byte_size(BName), LE = byte_size(Ext), case LN - LE of Neg when Neg < 0 -> BName; Pos -> - case BName of - <> -> + StartLen = LAll - Pos - LE, + case Name of + <<_:StartLen/binary,Part:Pos/binary,Ext/binary>> -> Part; - Other -> - Other + _Other -> + BName end end; @@ -447,7 +449,7 @@ join1b(<>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> - join1b(<<$/,Rest>>, RelativeName, Result, win32); + join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> join1b(Rest, RelativeName, [$/|Result], OsType); join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> @@ -546,6 +548,8 @@ win32_pathtype(_) -> relative. %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" -spec rootname(file:name()) -> file:filename(). +rootname(Name) when is_binary(Name) -> + list_to_binary(rootname(binary_to_list(Name))); % No need to handle unicode, . is < 128 rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -573,6 +577,12 @@ rootname([], Root, _Ext, _OsType) -> %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" -spec rootname(file:name(), file:name()) -> file:filename(). +rootname(Name, Ext) when is_binary(Name), is_binary(Ext) -> + list_to_binary(rootname(binary_to_list(Name),binary_to_list(Ext))); +rootname(Name, Ext) when is_binary(Name) -> + rootname(Name,filename_string_to_binary(Ext)); +rootname(Name, Ext) when is_binary(Ext) -> + rootname(filename_string_to_binary(Name),Ext); rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -639,7 +649,7 @@ win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) [<<$/>> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(Name) -> L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), - [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + [ X || X <- L, X =/= <<>> ]. unix_split(Name) -> @@ -900,7 +910,7 @@ do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). filename_string_to_binary(List) -> - case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + case unicode:characters_to_binary(flatten(List),unicode,file:native_name_encoding()) of {error,_,_} -> erlang:error(badarg); Bin when is_binary(Bin) -> diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index d54741051f..5a279609c6 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -53,8 +53,11 @@ wildcard_one(Config) when is_list(Config) -> wildcard_two(Config) when is_list(Config) -> ?line Dir = filename:join(?config(priv_dir, Config), "wildcard_two"), + ?line DirB = unicode:characters_to_binary(Dir, file:native_name_encoding()), ?line ok = file:make_dir(Dir), - ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc, Dir) end), + ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,Dir, X = filelib:wildcard(Wc, Dir)}]),X end), + ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,DirB, X = filelib:wildcard(Wc, DirB)}]), + [unicode:characters_to_list(Y,file:native_name_encoding()) || Y <- X] end), ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc, Dir++"/") end), case os:type() of {win32,_} -> @@ -253,5 +256,7 @@ ensure_dir_eexist(Config) when is_list(Config) -> %% There already is a file with the name of the directory %% we want to create. ?line NeedFile = filename:join(Name, "file"), + ?line NeedFileB = filename:join(Name, <<"file">>), ?line {error, eexist} = filelib:ensure_dir(NeedFile), + ?line {error, eexist} = filelib:ensure_dir(NeedFileB), ok. diff --git a/lib/stdlib/test/filename_SUITE.erl b/lib/stdlib/test/filename_SUITE.erl index ab6521f37b..dbce93600b 100644 --- a/lib/stdlib/test/filename_SUITE.erl +++ b/lib/stdlib/test/filename_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% Copyright Ericsson AB 1997-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,12 +22,19 @@ basename_1/1, basename_2/1, dirname/1, extension/1, join/1, t_nativename/1]). -export([pathtype/1,rootname/1,split/1,find_src/1]). +-export([absname_bin/1, absname_bin_2/1, + basename_bin_1/1, basename_bin_2/1, + dirname_bin/1, extension_bin/1, join_bin/1]). +-export([pathtype_bin/1,rootname_bin/1,split_bin/1]). -include("test_server.hrl"). all(suite) -> [absname, absname_2, basename_1, basename_2, dirname, extension, - join, pathtype, rootname, split, t_nativename, find_src]. + join, pathtype, rootname, split, t_nativename, find_src, + absname_bin, absname_bin_2, basename_bin_1, basename_bin_2, dirname_bin, + extension_bin, + join_bin, pathtype_bin, rootname_bin, split_bin]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -457,3 +464,307 @@ find_src(Config) when is_list(Config) -> %% Try to find the source for a preloaded module. ?line {error,{preloaded,init}} = filename:find_src(init), ok. + +%% +%% +%% With binaries +%% +%% + +absname_bin(Config) when is_list(Config) -> + case os:type() of + {win32, _} -> + ?line [Drive|_] = ?config(priv_dir, Config), + ?line Temp = filename:join([Drive|":/"], "temp"), + ?line case file:make_dir(Temp) of + ok -> ok; + {error,eexist} -> ok + end, + ?line {ok,Cwd} = file:get_cwd(), + ?line ok = file:set_cwd(Temp), + ?line <> = filename:absname(<<"foo">>), + ?line <> = filename:absname(<<"../ebin">>), + ?line <> = filename:absname(<<"/erlang">>), + ?line <> = filename:absname(<<"/erlang/src">>), + ?line <> = filename:absname(<<"\\erlang\\src">>), + ?line <> = filename:absname(<>), + ?line <> = + filename:absname(<>), + ?line <> = + filename:absname(<>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>), + + ?line file:set_cwd(<>), + ?line <> = filename:absname(<<"foo">>), + ?line <> = filename:absname(<<"../ebin">>), + ?line <> = filename:absname(<<"/erlang">>), + ?line <> = filename:absname(<<"/erlang/src">>), + ?line <> = filename:absname(<<"\\erlang\\\\src">>), + ?line <> = filename:absname(<>), + ?line <> = filename:absname(<>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>), + + ?line file:set_cwd(Cwd), + ok; + {unix, _} -> + ?line ok = file:set_cwd(<<"/usr">>), + ?line <<"/usr/foo">> = filename:absname(<<"foo">>), + ?line <<"/usr/../ebin">> = filename:absname(<<"../ebin">>), + + ?line file:set_cwd(<<"/">>), + ?line <<"/foo">> = filename:absname(<<"foo">>), + ?line <<"/../ebin">> = filename:absname(<<"../ebin">>), + ?line <<"/erlang">> = filename:absname(<<"/erlang">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang/src">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang///src">>), + ok + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +absname_bin_2(Config) when is_list(Config) -> + case os:type() of + {win32, _} -> + ?line [Drive|_] = ?config(priv_dir, Config), + ?line <> = filename:absname(<<"foo">>, <>), + ?line <> = filename:absname(<<"../ebin">>, + <>), + ?line <> = filename:absname(<<"/erlang">>, <>), + ?line <> = filename:absname(<<"/erlang/src">>, + <>), + ?line <> = filename:absname(<<"\\erlang\\src">>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = + filename:absname(<>, <>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>, <>), + + ?line file:set_cwd(<>), + ?line <> = filename:absname(foo, <>), + ?line <> = filename:absname(<<"foo">>, <>), + ?line <> = filename:absname(<<"../ebin">>, <>), + ?line <> = filename:absname(<<"/erlang">>, <>), + ?line <> = filename:absname(<<"/erlang/src">>, + <>), + ?line <> = filename:absname(<<"\\erlang\\\\src">>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>, <>), + + ok; + {unix, _} -> + ?line <<"/usr/foo">> = filename:absname(<<"foo">>, <<"/usr">>), + ?line <<"/usr/../ebin">> = filename:absname(<<"../ebin">>, <<"/usr">>), + + ?line <<"/foo">> = filename:absname(<<"foo">>, <<"/">>), + ?line <<"/../ebin">> = filename:absname(<<"../ebin">>, <<"/">>), + ?line <<"/erlang">> = filename:absname(<<"/erlang">>, <<"/">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang/src">>, <<"/">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang///src">>, <<"/">>), + ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +basename_bin_1(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(10)), + ?line <<".">> = filename:basename(<<".">>), + ?line <<"foo">> = filename:basename(<<"foo">>), + ?line <<"foo">> = filename:basename(<<"/usr/foo">>), + ?line <<"foo.erl">> = filename:basename(<<"A:usr/foo.erl">>), + ?line case os:type() of + {win32, _} -> + ?line <<"foo">> = filename:basename(<<"A:\\usr\\foo">>), + ?line <<"foo">> = filename:basename(<<"A:foo">>); + {unix, _} -> + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true">>) + end, + ?line test_server:timetrap_cancel(Dog), + ok. + +basename_bin_2(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(10)), + ?line <<".">> = filename:basename(<<".">>, <<".erl">>), + ?line <<"foo">> = filename:basename(<<"foo.erl">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"/usr/foo.erl">>, <<".hrl">>), + ?line <<"foo.erl">> = filename:basename(<<"/usr.hrl/foo.erl">>, <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"/usr.hrl/foo">>, <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"usr/foo/">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"usr/foo.erl/">>, <<".erl">>), + ?line case os:type() of + {win32, _} -> + ?line <<"foo">> = filename:basename(<<"A:foo">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"a:\\usr\\foo.erl">>, + <<".hrl">>), + ?line <<"foo.erl">> = filename:basename(<<"c:\\usr.hrl\\foo.erl">>, + <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"A:\\usr\\foo">>, <<".hrl">>); + {unix, _} -> + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true.erl">>, <<".erl">>), + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true">>, <<".erl">>) + end, + ?line test_server:timetrap_cancel(Dog), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +dirname_bin(Config) when is_list(Config) -> + case os:type() of + {win32,_} -> + ?line <<"A:/usr">> = filename:dirname(<<"A:/usr/foo.erl">>), + ?line <<"A:usr">> = filename:dirname(<<"A:usr/foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr\\foo.erl">>), + ?line <<"/">> = filename:dirname(<<"\\usr">>), + ?line <<"A:">> = filename:dirname(<<"A:">>); + vxworks -> + ?line <<"net:/usr">> = filename:dirname(<<"net:/usr/foo.erl">>), + ?line <<"/disk0:/usr">> = filename:dirname(<<"/disk0:/usr/foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr\\foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr">>), + ?line <<"net:">> = filename:dirname(<<"net:">>); + _ -> true + end, + ?line <<"usr">> = filename:dirname(<<"usr///foo.erl">>), + ?line <<".">> = filename:dirname(<<"foo.erl">>), + ?line <<".">> = filename:dirname(<<".">>), + case os:type() of + vxworks -> + ?line <<"/">> = filename:dirname(<<"/">>), + ?line <<"/usr">> = filename:dirname(<<"/usr">>); + _ -> + ?line <<"/">> = filename:dirname(<<"/">>), + ?line <<"/">> = filename:dirname(<<"/usr">>) + end, + ok. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +extension_bin(Config) when is_list(Config) -> + ?line <<".erl">> = filename:extension(<<"A:/usr/foo.erl">>), + ?line <<".erl">> = filename:extension(<<"A:/usr/foo.nisse.erl">>), + ?line <<".erl">> = filename:extension(<<"A:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"A:/usr.bar/foo">>), + ?line <<"">> = filename:extension(<<"A:/usr/foo">>), + ?line case os:type() of + {win32, _} -> + ?line <<"">> = filename:extension(<<"A:\\usr\\foo">>), + ?line <<".erl">> = + filename:extension(<<"A:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"A:/usr.bar/foo">>), + ok; + vxworks -> + ?line <<"">> = filename:extension(<<"/disk0:\\usr\\foo">>), + ?line <<".erl">> = + filename:extension(<<"net:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"net:/usr.bar/foo">>), + ok; + _ -> ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +join_bin(Config) when is_list(Config) -> + ?line <<"/">> = filename:join([<<"/">>]), + ?line <<"/">> = filename:join([<<"//">>]), + ?line <<"usr/foo.erl">> = filename:join(<<"usr">>,<<"foo.erl">>), + ?line <<"/src/foo.erl">> = filename:join(usr, <<"/src/foo.erl">>), + ?line <<"/src/foo.erl">> = filename:join([<<"/src/">>,'foo.erl']), + ?line <<"/src/foo.erl">> = filename:join(<<"usr">>, ["/sr", 'c/foo.erl']), + ?line <<"/src/foo.erl">> = filename:join(<<"usr">>, <<"/src/foo.erl">>), + + %% Make sure that redundant slashes work too. + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/////d//e/f/g">>]), + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/">>, <<"d//e/f/g">>]), + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"d//e/f/g">>]), + ?line <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"/d//e/f/g">>]), + ?line <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), + + ?line <<"foo/bar">> = filename:join([$f,$o,$o,$/,[]], <<"bar">>), + + ?line case os:type() of + {win32, _} -> + ?line <<"d:/">> = filename:join([<<"D:/">>]), + ?line <<"d:/">> = filename:join([<<"D:\\">>]), + ?line <<"d:/abc">> = filename:join([<<"D:/">>, <<"abc">>]), + ?line <<"d:abc">> = filename:join([<<"D:">>, <<"abc">>]), + ?line <<"a/b/c/d/e/f/g">> = + filename:join([<<"a//b\\c//\\/\\d/\\e/f\\g">>]), + ?line <<"a:usr/foo.erl">> = + filename:join([<<"A:">>,<<"usr">>,<<"foo.erl">>]), + ?line <<"/usr/foo.erl">> = + filename:join([<<"A:">>,<<"/usr">>,<<"foo.erl">>]), + ?line <<"c:usr">> = filename:join(<<"A:">>,<<"C:usr">>), + ?line <<"a:usr">> = filename:join(<<"A:">>,<<"usr">>), + ?line <<"c:/usr">> = filename:join(<<"A:">>, <<"C:/usr">>), + ?line <<"c:/usr/foo.erl">> = + filename:join([<<"A:">>,<<"C:/usr">>,<<"foo.erl">>]), + ?line <<"c:usr/foo.erl">> = + filename:join([<<"A:">>,<<"C:usr">>,<<"foo.erl">>]), + ?line <<"d:/foo">> = filename:join([$D, $:, $/, []], <<"foo">>), + ok; + {unix, _} -> + ok + end. + +pathtype_bin(Config) when is_list(Config) -> + ?line relative = filename:pathtype(<<"..">>), + ?line relative = filename:pathtype(<<"foo">>), + ?line relative = filename:pathtype(<<"foo/bar">>), + ?line relative = filename:pathtype('foo/bar'), + case os:type() of + {win32, _} -> + ?line volumerelative = filename:pathtype(<<"/usr/local/bin">>), + ?line volumerelative = filename:pathtype(<<"A:usr/local/bin">>), + ok; + {unix, _} -> + ?line absolute = filename:pathtype(<<"/">>), + ?line absolute = filename:pathtype(<<"/usr/local/bin">>), + ok + end. + +rootname_bin(Config) when is_list(Config) -> + ?line <<"/jam.src/kalle">> = filename:rootname(<<"/jam.src/kalle">>), + ?line <<"/jam.src/foo">> = filename:rootname(<<"/jam.src/foo.erl">>), + ?line <<"/jam.src/foo">> = filename:rootname(<<"/jam.src/foo.erl">>, <<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(<<"/jam.src/foo.jam">>, <<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(["/jam.sr",'c/foo.j',"am"],<<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(["/jam.sr",'c/foo.j'|am],<<".erl">>), + ok. + +split_bin(Config) when is_list(Config) -> + case os:type() of + vxworks -> + ?line [<<"/usr">>,<<"local">>,<<"bin">>] = filename:split(<<"/usr/local/bin">>); + _ -> + ?line [<<"/">>,<<"usr">>,<<"local">>,<<"bin">>] = filename:split(<<"/usr/local/bin">>) + end, + ?line [<<"foo">>,<<"bar">>]= filename:split(<<"foo/bar">>), + ?line [<<"foo">>, <<"bar">>, <<"hello">>]= filename:split(<<"foo////bar//hello">>), + case os:type() of + {win32,_} -> + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:/msdev/include">>), + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"A:/msdev/include">>), + ?line [<<"msdev">>,<<"include">>] = + filename:split(<<"msdev\\include">>), + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:\\msdev\\include">>), + ?line [<<"a:">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:msdev\\include">>), + ok; + _ -> + ok + end. + -- cgit v1.2.3