From aa15249fe5d8819e511ca0f09eae1d1207903e53 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 29 Jan 2013 17:28:20 +0100 Subject: Make prim_file skip invalid filenames in unicode mode The fix affects list_dir and read_link. Raw filenames are now never produced, just consumed even if +fnu or +fna is used on Linux etc. This also adds the options to get error return or error handler warning messages with +fn{u|a}{i|w|e} as an option to erl. This is still not documented and there needs to be other versions of read_dir and read_link to facilitate reading of all types of filenames and links. A check that we will not change to an invalid directory is also needed. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/erl_init.c | 53 +++++++++++++++++++++++++++++++++++++--- erts/emulator/beam/erl_unicode.c | 17 ++++++++++--- erts/emulator/beam/sys.h | 14 ++++++++++- 4 files changed, 77 insertions(+), 8 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index f138324e1f..ce60bb9bbc 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -269,6 +269,7 @@ atom hipe_architecture atom http httph https http_response http_request http_header http_eoh http_error http_bin httph_bin atom id atom if_clause +atom ignore atom imports atom in atom in_exiting diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 223c9c4d7e..ec3e0d54cb 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -981,19 +981,64 @@ erl_start(int argc, char **argv) break; case 'f': if (!strncmp(argv[i],"-fn",3)) { + int warning_type = ERL_FILENAME_WARNING_WARNING; arg = get_arg(argv[i]+3, argv[i+1], &i); switch (*arg) { case 'u': - erts_set_user_requested_filename_encoding(ERL_FILENAME_UTF8); + switch (*(argv[i]+4)) { + case 'w': + case 0: + break; + case 'i': + warning_type = ERL_FILENAME_WARNING_IGNORE; + break; + case 'e': + warning_type = ERL_FILENAME_WARNING_ERROR; + break; + default: + erts_fprintf(stderr, "bad type of warnings for " + "wrongly coded filename: %s\n", argv[i]+4); + erts_usage(); + } + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_UTF8, + warning_type + ); break; case 'l': - erts_set_user_requested_filename_encoding(ERL_FILENAME_LATIN1); + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_LATIN1, + warning_type + ); break; case 'a': - erts_set_user_requested_filename_encoding(ERL_FILENAME_UNKNOWN); + switch (*(argv[i]+4)) { + case 'w': + case 0: + break; + case 'i': + warning_type = ERL_FILENAME_WARNING_IGNORE; + break; + case 'e': + warning_type = ERL_FILENAME_WARNING_ERROR; + break; + default: + erts_fprintf(stderr, "bad type of warnings for " + "wrongly coded filename: %s\n", argv[i]+4); + erts_usage(); + } + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_UNKNOWN, + warning_type + ); break; default: - erts_fprintf(stderr, "bad filename encoding %s, can be (l,u or a)\n", arg); + erts_fprintf(stderr, "bad filename encoding %s, can be " + "(l,u or a, optionally followed by w, " + "i or e)\n", arg); erts_usage(); } break; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 99108af937..8d7930e5fa 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2573,8 +2573,20 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) case ERL_FILENAME_UTF8: bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { + Eterm *hp = HAlloc(BIF_P,3); + Eterm warn_type = NIL; erts_free_aligned_binary_bytes(temp_alloc); - goto noconvert; + switch (erts_get_filename_warning_type()) { + case ERL_FILENAME_WARNING_IGNORE: + warn_type = am_ignore; + break; + case ERL_FILENAME_WARNING_ERROR: + warn_type = am_error; + break; + default: + warn_type = am_warning; + } + BIF_RET(TUPLE2(hp,am_error,warn_type)); } num_built = 0; num_eaten = 0; @@ -2607,9 +2619,8 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); default: - goto noconvert; + break; } - noconvert: BIF_RET(BIF_ARG_1); } diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 249a9c05c2..9416a91480 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1029,10 +1029,22 @@ char* win32_errorstr(int); #define ERL_FILENAME_UTF8_MAC (3) #define ERL_FILENAME_WIN_WCHAR (4) +/************************************************************************ + * If a filename in for example list_dir is not in the right encoding, it + * will be skipped in the resulting list, but depending on a startup setting + * we will inform the user in different ways. These macros define the + * different reactions to wrongly coded filenames. In the error case an + * exception will be thrown by prim_file. + ************************************************************************/ +#define ERL_FILENAME_WARNING_WARNING (0) +#define ERL_FILENAME_WARNING_IGNORE (1) +#define ERL_FILENAME_WARNING_ERROR (2) + int erts_get_native_filename_encoding(void); /* The set function is only to be used by erl_init! */ -void erts_set_user_requested_filename_encoding(int encoding); +void erts_set_user_requested_filename_encoding(int encoding, int warning); int erts_get_user_requested_filename_encoding(void); +int erts_get_filename_warning_type(void); void erts_init_sys_common_misc(void); -- cgit v1.2.3 From a8a8d27a461e82af3f88774e07906a920f95e63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 11 Feb 2013 12:48:08 +0100 Subject: Teach prim_file:set_cwd() to avoid entering non-translatable directories We have decided that we don't want to deal with the compilations of prim_file:get_cwd() returning a binary when the current directory name cannot be translated losslessly to a list (i.e. when the run-time system was started with +fnu and the current directory name contains bytes that are not part of a valid UTF-8 sequence). Therefore, if prim_file:set_cwd() is given a binary as the pathname, we will need to check the binary to make sure it can be translated to a list. We will introduce a new BIF, called prim_file:is_translatable/1, which will check both filename encoding mode, and if it is one of Unicode modes, the binary as well. We don't need to do anything special if prim_file:set_cwd() is passed a list. --- erts/emulator/beam/bif.tab | 1 + erts/emulator/beam/erl_unicode.c | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4aaf466008..e313188901 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -522,6 +522,7 @@ bif erlang:nif_error/2 bif prim_file:internal_name2native/1 bif prim_file:internal_native2name/1 bif prim_file:internal_normalize_utf8/1 +bif prim_file:is_translatable/1 bif file:native_name_encoding/0 # diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 8d7930e5fa..80982f3760 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2657,6 +2657,52 @@ BIF_RETTYPE prim_file_internal_normalize_utf8_1(BIF_ALIST_1) BIF_RET(ret); } +BIF_RETTYPE prim_file_is_translatable_1(BIF_ALIST_1) +{ + ERTS_DECLARE_DUMMY(Eterm real_bin); + ERTS_DECLARE_DUMMY(Uint offset); + Uint size; + Uint num_chars; + Uint bitsize; + ERTS_DECLARE_DUMMY(Uint bitoffs); + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + int status; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + size = binary_size(BIF_ARG_1); + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + BIF_ERROR(BIF_P,BADARG); + } + if (size == 0) { + BIF_RET(am_true); + } + + /* + * If the encoding is latin1, the pathname is always translatable. + */ + switch (erts_get_native_filename_encoding()) { + case ERL_FILENAME_LATIN1: + BIF_RET(am_true); + case ERL_FILENAME_WIN_WCHAR: + if (erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + BIF_RET(am_true); + } + } + + /* + * Check whether the binary contains legal UTF-8 sequences. + */ + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + status = erts_analyze_utf8(bytes, size, &err_pos, &num_chars, NULL); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(status == ERTS_UTF8_OK ? am_true : am_false); +} + BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) { switch (erts_get_native_filename_encoding()) { -- cgit v1.2.3