From a8a8d27a461e82af3f88774e07906a920f95e63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 11 Feb 2013 12:48:08 +0100 Subject: Teach prim_file:set_cwd() to avoid entering non-translatable directories We have decided that we don't want to deal with the compilations of prim_file:get_cwd() returning a binary when the current directory name cannot be translated losslessly to a list (i.e. when the run-time system was started with +fnu and the current directory name contains bytes that are not part of a valid UTF-8 sequence). Therefore, if prim_file:set_cwd() is given a binary as the pathname, we will need to check the binary to make sure it can be translated to a list. We will introduce a new BIF, called prim_file:is_translatable/1, which will check both filename encoding mode, and if it is one of Unicode modes, the binary as well. We don't need to do anything special if prim_file:set_cwd() is passed a list. --- erts/emulator/beam/bif.tab | 1 + erts/emulator/beam/erl_unicode.c | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4aaf466008..e313188901 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -522,6 +522,7 @@ bif erlang:nif_error/2 bif prim_file:internal_name2native/1 bif prim_file:internal_native2name/1 bif prim_file:internal_normalize_utf8/1 +bif prim_file:is_translatable/1 bif file:native_name_encoding/0 # diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 8d7930e5fa..80982f3760 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2657,6 +2657,52 @@ BIF_RETTYPE prim_file_internal_normalize_utf8_1(BIF_ALIST_1) BIF_RET(ret); } +BIF_RETTYPE prim_file_is_translatable_1(BIF_ALIST_1) +{ + ERTS_DECLARE_DUMMY(Eterm real_bin); + ERTS_DECLARE_DUMMY(Uint offset); + Uint size; + Uint num_chars; + Uint bitsize; + ERTS_DECLARE_DUMMY(Uint bitoffs); + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + int status; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + size = binary_size(BIF_ARG_1); + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + BIF_ERROR(BIF_P,BADARG); + } + if (size == 0) { + BIF_RET(am_true); + } + + /* + * If the encoding is latin1, the pathname is always translatable. + */ + switch (erts_get_native_filename_encoding()) { + case ERL_FILENAME_LATIN1: + BIF_RET(am_true); + case ERL_FILENAME_WIN_WCHAR: + if (erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + BIF_RET(am_true); + } + } + + /* + * Check whether the binary contains legal UTF-8 sequences. + */ + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + status = erts_analyze_utf8(bytes, size, &err_pos, &num_chars, NULL); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(status == ERTS_UTF8_OK ? am_true : am_false); +} + BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) { switch (erts_get_native_filename_encoding()) { -- cgit v1.2.3