diff options
author | Björn Gustavsson <[email protected]> | 2013-02-12 14:30:13 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2013-02-12 14:30:13 +0100 |
commit | cd08400f92ec7672025bf39a458effcf33a423dc (patch) | |
tree | b874f8c3fe97558d72dcaedc31dd941060ccea75 /erts | |
parent | bbf692965470a9e993e1afd6f1a9375cbe832fcb (diff) | |
parent | 08bc8a328275e751836fab0c562b50cf71000c2b (diff) | |
download | otp-cd08400f92ec7672025bf39a458effcf33a423dc.tar.gz otp-cd08400f92ec7672025bf39a458effcf33a423dc.tar.bz2 otp-cd08400f92ec7672025bf39a458effcf33a423dc.zip |
Merge branch 'pan/unicode_filename_warnings'
* pan/unicode_filename_warnings:
Add file:list_dir_all/1 and file:read_link_all/1
prim_file: Add list_dir_all() and read_link_all()
Teach prim_file:set_cwd() to avoid entering non-translatable directories
Make prim_file skip invalid filenames in unicode mode
prim_file: Refactor functions that return filenames
prim_file: Refactor handling of responses
prim_file: Always open non-file ports in binary mode
Test that list_dir("non-existing-dir") fails with the correct error
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/atom.names | 1 | ||||
-rw-r--r-- | erts/emulator/beam/bif.tab | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_init.c | 53 | ||||
-rw-r--r-- | erts/emulator/beam/erl_unicode.c | 63 | ||||
-rw-r--r-- | erts/emulator/beam/sys.h | 14 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_sys_common_misc.c | 9 | ||||
-rw-r--r-- | erts/preloaded/ebin/erl_prim_loader.beam | bin | 54120 -> 54360 bytes | |||
-rw-r--r-- | erts/preloaded/ebin/prim_file.beam | bin | 41128 -> 44092 bytes | |||
-rw-r--r-- | erts/preloaded/src/erl_prim_loader.erl | 16 | ||||
-rw-r--r-- | erts/preloaded/src/prim_file.erl | 208 |
10 files changed, 289 insertions, 76 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index f138324e1f..ce60bb9bbc 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -269,6 +269,7 @@ atom hipe_architecture atom http httph https http_response http_request http_header http_eoh http_error http_bin httph_bin atom id atom if_clause +atom ignore atom imports atom in atom in_exiting diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4aaf466008..e313188901 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -522,6 +522,7 @@ bif erlang:nif_error/2 bif prim_file:internal_name2native/1 bif prim_file:internal_native2name/1 bif prim_file:internal_normalize_utf8/1 +bif prim_file:is_translatable/1 bif file:native_name_encoding/0 # diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 223c9c4d7e..ec3e0d54cb 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -981,19 +981,64 @@ erl_start(int argc, char **argv) break; case 'f': if (!strncmp(argv[i],"-fn",3)) { + int warning_type = ERL_FILENAME_WARNING_WARNING; arg = get_arg(argv[i]+3, argv[i+1], &i); switch (*arg) { case 'u': - erts_set_user_requested_filename_encoding(ERL_FILENAME_UTF8); + switch (*(argv[i]+4)) { + case 'w': + case 0: + break; + case 'i': + warning_type = ERL_FILENAME_WARNING_IGNORE; + break; + case 'e': + warning_type = ERL_FILENAME_WARNING_ERROR; + break; + default: + erts_fprintf(stderr, "bad type of warnings for " + "wrongly coded filename: %s\n", argv[i]+4); + erts_usage(); + } + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_UTF8, + warning_type + ); break; case 'l': - erts_set_user_requested_filename_encoding(ERL_FILENAME_LATIN1); + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_LATIN1, + warning_type + ); break; case 'a': - erts_set_user_requested_filename_encoding(ERL_FILENAME_UNKNOWN); + switch (*(argv[i]+4)) { + case 'w': + case 0: + break; + case 'i': + warning_type = ERL_FILENAME_WARNING_IGNORE; + break; + case 'e': + warning_type = ERL_FILENAME_WARNING_ERROR; + break; + default: + erts_fprintf(stderr, "bad type of warnings for " + "wrongly coded filename: %s\n", argv[i]+4); + erts_usage(); + } + erts_set_user_requested_filename_encoding + ( + ERL_FILENAME_UNKNOWN, + warning_type + ); break; default: - erts_fprintf(stderr, "bad filename encoding %s, can be (l,u or a)\n", arg); + erts_fprintf(stderr, "bad filename encoding %s, can be " + "(l,u or a, optionally followed by w, " + "i or e)\n", arg); erts_usage(); } break; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 99108af937..80982f3760 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2573,8 +2573,20 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) case ERL_FILENAME_UTF8: bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { + Eterm *hp = HAlloc(BIF_P,3); + Eterm warn_type = NIL; erts_free_aligned_binary_bytes(temp_alloc); - goto noconvert; + switch (erts_get_filename_warning_type()) { + case ERL_FILENAME_WARNING_IGNORE: + warn_type = am_ignore; + break; + case ERL_FILENAME_WARNING_ERROR: + warn_type = am_error; + break; + default: + warn_type = am_warning; + } + BIF_RET(TUPLE2(hp,am_error,warn_type)); } num_built = 0; num_eaten = 0; @@ -2607,9 +2619,8 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(ret); default: - goto noconvert; + break; } - noconvert: BIF_RET(BIF_ARG_1); } @@ -2646,6 +2657,52 @@ BIF_RETTYPE prim_file_internal_normalize_utf8_1(BIF_ALIST_1) BIF_RET(ret); } +BIF_RETTYPE prim_file_is_translatable_1(BIF_ALIST_1) +{ + ERTS_DECLARE_DUMMY(Eterm real_bin); + ERTS_DECLARE_DUMMY(Uint offset); + Uint size; + Uint num_chars; + Uint bitsize; + ERTS_DECLARE_DUMMY(Uint bitoffs); + byte *temp_alloc = NULL; + byte *bytes; + byte *err_pos; + int status; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + size = binary_size(BIF_ARG_1); + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + BIF_ERROR(BIF_P,BADARG); + } + if (size == 0) { + BIF_RET(am_true); + } + + /* + * If the encoding is latin1, the pathname is always translatable. + */ + switch (erts_get_native_filename_encoding()) { + case ERL_FILENAME_LATIN1: + BIF_RET(am_true); + case ERL_FILENAME_WIN_WCHAR: + if (erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) { + BIF_RET(am_true); + } + } + + /* + * Check whether the binary contains legal UTF-8 sequences. + */ + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + status = erts_analyze_utf8(bytes, size, &err_pos, &num_chars, NULL); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(status == ERTS_UTF8_OK ? am_true : am_false); +} + BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) { switch (erts_get_native_filename_encoding()) { diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 249a9c05c2..9416a91480 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1029,10 +1029,22 @@ char* win32_errorstr(int); #define ERL_FILENAME_UTF8_MAC (3) #define ERL_FILENAME_WIN_WCHAR (4) +/************************************************************************ + * If a filename in for example list_dir is not in the right encoding, it + * will be skipped in the resulting list, but depending on a startup setting + * we will inform the user in different ways. These macros define the + * different reactions to wrongly coded filenames. In the error case an + * exception will be thrown by prim_file. + ************************************************************************/ +#define ERL_FILENAME_WARNING_WARNING (0) +#define ERL_FILENAME_WARNING_IGNORE (1) +#define ERL_FILENAME_WARNING_ERROR (2) + int erts_get_native_filename_encoding(void); /* The set function is only to be used by erl_init! */ -void erts_set_user_requested_filename_encoding(int encoding); +void erts_set_user_requested_filename_encoding(int encoding, int warning); int erts_get_user_requested_filename_encoding(void); +int erts_get_filename_warning_type(void); void erts_init_sys_common_misc(void); diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 1bf5fa89f4..a0cd4cd10a 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -47,14 +47,16 @@ /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; +static int filename_warning = ERL_FILENAME_WARNING_WARNING; #if defined(__WIN32__) || defined(__DARWIN__) static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */ #else static int user_filename_encoding = ERL_FILENAME_LATIN1; #endif -void erts_set_user_requested_filename_encoding(int encoding) +void erts_set_user_requested_filename_encoding(int encoding, int warning) { user_filename_encoding = encoding; + filename_warning = warning; } int erts_get_user_requested_filename_encoding(void) @@ -62,6 +64,11 @@ int erts_get_user_requested_filename_encoding(void) return user_filename_encoding; } +int erts_get_filename_warning_type(void) +{ + return filename_warning; +} + void erts_init_sys_common_misc(void) { #if defined(__WIN32__) diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam Binary files differindex f8c2df3a5a..4a3af265c1 100644 --- a/erts/preloaded/ebin/erl_prim_loader.beam +++ b/erts/preloaded/ebin/erl_prim_loader.beam diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam Binary files differindex ca93edbe25..f7b3aac376 100644 --- a/erts/preloaded/ebin/prim_file.beam +++ b/erts/preloaded/ebin/prim_file.beam diff --git a/erts/preloaded/src/erl_prim_loader.erl b/erts/preloaded/src/erl_prim_loader.erl index d36fdeba3f..7490954f2d 100644 --- a/erts/preloaded/src/erl_prim_loader.erl +++ b/erts/preloaded/src/erl_prim_loader.erl @@ -149,8 +149,18 @@ start_it("inet", Id, Pid, Hosts) -> start_it("efile", Id, Pid, _Hosts) -> process_flag(trap_exit, true), - {ok, Port} = prim_file:open([binary]), - init_ack(Pid), + {ok, Port} = prim_file:start(), + %% Check that we started in a valid directory. + case prim_file:get_cwd(Port) of + {error, _} -> + %% At this point in the startup, we have no error_logger at all. + Report = "Invalid current directory or invalid filename " + "mode: loader cannot read current directory\n", + erlang:display(Report), + exit({error, invalid_current_directory}); + _ -> + init_ack(Pid) + end, MultiGet = case erlang:system_info(thread_pool_size) of 0 -> false; _ -> true @@ -434,7 +444,7 @@ efile_multi_get_file_from_port2(_MFs, 0, _Max, State, _Paths, _Fun, _Ref, Ret) - efile_par_get_file(Ref, State, {Mod,File} = MF, Paths, Pid, Fun) -> %% One port for each file read in "parallel": - case prim_file:open([binary]) of + case prim_file:start() of {ok, Port} -> Port0 = State#state.data, State1 = State#state{data = Port}, diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 50adf9c89d..bf8879c2a0 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -50,9 +50,9 @@ write_file_info/2, write_file_info/3, write_file_info/4, make_link/2, make_link/3, make_symlink/2, make_symlink/3, - read_link/1, read_link/2, + read_link/1, read_link/2, read_link_all/1, read_link_all/2, read_link_info/1, read_link_info/2, read_link_info/3, - list_dir/1, list_dir/2]). + list_dir/1, list_dir/2, list_dir_all/1, list_dir_all/2]). %% How to start and stop the ?DRV port. -export([start/0, stop/1]). @@ -152,16 +152,19 @@ -export([internal_name2native/1, internal_native2name/1, - internal_normalize_utf8/1]). + internal_normalize_utf8/1, + is_translatable/1]). -type prim_file_name() :: string() | unicode:unicode_binary(). +-type prim_file_name_error() :: 'error' | 'ignore' | 'warning'. -spec internal_name2native(prim_file_name()) -> binary(). internal_name2native(_) -> erlang:nif_error(undefined). --spec internal_native2name(binary()) -> prim_file_name(). +-spec internal_native2name(binary()) -> + prim_file_name() | {'error',prim_file_name_error()}. internal_native2name(_) -> erlang:nif_error(undefined). @@ -171,6 +174,11 @@ internal_native2name(_) -> internal_normalize_utf8(_) -> erlang:nif_error(undefined). +-spec is_translatable(prim_file_name()) -> boolean(). + +is_translatable(_) -> + erlang:nif_error(undefined). + %%% End of BIFs %%%----------------------------------------------------------------- @@ -210,12 +218,7 @@ open(_, _) -> %% Opens a port that can be used for open/3 or read_file/2. %% Returns {ok, Port} | {error, Reason}. open(Portopts) when is_list(Portopts) -> - case drv_open(?FD_DRV, Portopts) of - {error, _} = Error -> - Error; - Other -> - Other - end; + drv_open(?FD_DRV, [binary|Portopts]); open(_) -> {error, badarg}. @@ -607,13 +610,7 @@ sendfile(#file_descriptor{module = ?MODULE, data = {Port, _}}, %% Returns {ok, Port}, the Port should be used as first argument in all %% the following functions. Returns {error, Reason} upon failure. start() -> - try erlang:open_port({spawn, ?DRV}, [binary]) of - Port -> - {ok, Port} - catch - error:Reason -> - {error, Reason} - end. + drv_open(?DRV, [binary]). stop(Port) when is_port(Port) -> try erlang:port_close(Port) of @@ -667,7 +664,8 @@ get_cwd_int(Drive) -> get_cwd_int({?DRV, [binary]}, Drive). get_cwd_int(Port, Drive) -> - drv_command(Port, <<?FILE_PWD, Drive>>). + drv_command(Port, <<?FILE_PWD, Drive>>, + fun handle_fname_response/1). @@ -679,10 +677,17 @@ set_cwd(Dir) -> set_cwd(Port, Dir) when is_port(Port) -> set_cwd_int(Port, Dir). -set_cwd_int(Port, Dir) -> - %% Dir is now either a string or an EXIT tuple. - %% An EXIT tuple will fail in the following catch. - drv_command(Port, [?FILE_CHDIR, pathname(Dir)]). +set_cwd_int(Port, Dir) when is_binary(Dir) -> + case prim_file:is_translatable(Dir) of + false -> + {error, no_translation}; + true -> + drv_command(Port, [?FILE_CHDIR, pathname(Dir)]) + end; +set_cwd_int(Port, Dir) when is_list(Dir) -> + drv_command(Port, [?FILE_CHDIR, pathname(Dir)]); +set_cwd_int(_, _) -> + {error, badarg}. @@ -775,7 +780,8 @@ altname(Port, File) when is_port(Port) -> altname_int(Port, File). altname_int(Port, File) -> - drv_command(Port, [?FILE_ALTNAME, pathname(File)]). + drv_command(Port, [?FILE_ALTNAME, pathname(File)], + fun handle_fname_response/1). %% write_file_info/{2,3,4} @@ -868,7 +874,20 @@ read_link(Port, Link) when is_port(Port) -> read_link_int(Port, Link). read_link_int(Port, Link) -> - drv_command(Port, [?FILE_READLINK, pathname(Link)]). + drv_command(Port, [?FILE_READLINK, pathname(Link)], + fun handle_fname_response/1). + +%% read_link_all/{2,3} + +read_link_all(Link) -> + read_link_all_int({?DRV, [binary]}, Link). + +read_link_all(Port, Link) when is_port(Port) -> + read_link_all_int(Port, Link). + +read_link_all_int(Port, Link) -> + drv_command(Port, [?FILE_READLINK, pathname(Link)], + fun handle_fname_response_all/1). @@ -910,20 +929,112 @@ list_dir(Port, Dir) when is_port(Port) -> list_dir_int(Port, Dir). list_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_READDIR, pathname(Dir)], []). - + drv_command(Port, [?FILE_READDIR, pathname(Dir)], + fun(P) -> + case list_dir_response(P, []) of + {ok, RawNames} -> + {ok, list_dir_convert(RawNames)}; + Error -> + Error + end + end). + +list_dir_all(Dir) -> + list_dir_all_int({?DRV, [binary]}, Dir). + +list_dir_all(Port, Dir) when is_port(Port) -> + list_dir_all_int(Port, Dir). + +list_dir_all_int(Port, Dir) -> + drv_command(Port, [?FILE_READDIR, pathname(Dir)], + fun(P) -> + case list_dir_response(P, []) of + {ok, RawNames} -> + {ok, list_dir_convert_all(RawNames)}; + Error -> + Error + end + end). + +list_dir_response(Port, Acc0) -> + case drv_get_response(Port) of + {lfname, []} -> + {ok, Acc0}; + {lfname, Names} -> + Acc = [Name || <<L:16,Name:L/binary>> <= Names] ++ Acc0, + list_dir_response(Port, Acc); + Error -> + Error + end. +list_dir_convert([Name|Names]) -> + %% If the filename cannot be converted, return error or ignore + %% with optional error logger warning, depending on +fn{u|a}{i|e|w} + %% emulator switches. + case prim_file:internal_native2name(Name) of + {error, warning} -> + error_logger:warning_msg("Non-unicode filename ~p ignored\n", + [Name]), + list_dir_convert(Names); + {error, ignore} -> + list_dir_convert(Names); + {error, error} -> + {error, {no_translation, Name}}; + Converted when is_list(Converted) -> + [Converted|list_dir_convert(Names)] + end; +list_dir_convert([]) -> []. + +list_dir_convert_all([Name|Names]) -> + %% If the filename cannot be converted, retain the filename as + %% a binary. + case prim_file:internal_native2name(Name) of + {error, _} -> + [Name|list_dir_convert(Names)]; + Converted when is_list(Converted) -> + [Converted|list_dir_convert(Names)] + end; +list_dir_convert_all([]) -> []. %%%----------------------------------------------------------------- %%% Functions to communicate with the driver +handle_fname_response(Port) -> + case drv_get_response(Port) of + {fname, Name} -> + case prim_file:internal_native2name(Name) of + {error, warning} -> + error_logger:warning_msg("Non-unicode filename ~p " + "ignored when reading link\n", + [Name]), + {error, einval}; + {error, _} -> + {error, einval}; + Converted when is_list(Converted) -> + {ok, Converted} + end; + Error -> + Error + end. +handle_fname_response_all(Port) -> + case drv_get_response(Port) of + {fname, Name} -> + case prim_file:internal_native2name(Name) of + {error, _} -> + {ok, Name}; + Converted when is_list(Converted) -> + {ok, Converted} + end; + Error -> + Error + end. %% Opens a driver port and converts any problems into {error, emfile}. %% Returns {ok, Port} when successful. drv_open(Driver, Portopts) -> - try erlang:open_port({spawn, Driver}, Portopts) of + try erlang:open_port({spawn_driver, Driver}, Portopts) of Port -> {ok, Port} catch @@ -1028,19 +1139,10 @@ drv_command_nt(Port, Command, R) when is_port(Port) -> %% Receives the response from a driver port. %% Returns: {ok, ListOrBinary}|{error, Reason} -drv_get_response(Port, R) when is_list(R) -> - case drv_get_response(Port) of - ok -> - {ok, R}; - {ok, Name} -> - drv_get_response(Port, [Name|R]); - {append, Names} -> - drv_get_response(Port, append(Names, R)); - Error -> - Error - end; -drv_get_response(Port, _) -> - drv_get_response(Port). +drv_get_response(Port, undefined) -> + drv_get_response(Port); +drv_get_response(Port, Fun) when is_function(Fun, 1) -> + Fun(Port). drv_get_response(Port) -> erlang:bump_reductions(100), @@ -1060,10 +1162,6 @@ drv_get_response(Port) -> %%%----------------------------------------------------------------- %%% Utility functions. -append([I | Is], R) when is_list(R) -> append(Is, [I | R]); -append([], R) -> R. - - %% Converts a list of mode atoms into a mode word for the driver. %% Returns {Mode, Portopts, Setopts} where Portopts is a list of %% options for erlang:open_port/2 and Setopts is a list of @@ -1205,18 +1303,10 @@ translate_response(?FILE_RESP_N2DATA = X, L0) when is_list(L0) -> end; translate_response(?FILE_RESP_EOF, []) -> eof; -translate_response(?FILE_RESP_FNAME, []) -> - ok; -translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> - {ok, prim_file:internal_native2name(Data)}; translate_response(?FILE_RESP_FNAME, Data) -> - {ok, Data}; -translate_response(?FILE_RESP_LFNAME, []) -> - ok; -translate_response(?FILE_RESP_LFNAME, Data) when is_binary(Data) -> - {append, transform_lfname(Data)}; + {fname, Data}; translate_response(?FILE_RESP_LFNAME, Data) -> - {append, transform_lfname(Data)}; + {lfname, Data}; translate_response(?FILE_RESP_ALL_DATA, Data) -> {ok, Data}; translate_response(X, Data) -> @@ -1332,16 +1422,6 @@ transform_ldata(0, List, [Size | Sizes], R) -> {Front, Rear} = lists_split(List, Size), transform_ldata(0, Rear, Sizes, [Front | R]). -transform_lfname(<<>>) -> []; -transform_lfname(<<L:16, Name:L/binary, Names/binary>>) -> - [ prim_file:internal_native2name(Name) | transform_lfname(Names)]; -transform_lfname([]) -> []; -transform_lfname([L1,L2|Names]) -> - L = (L1 bsl 8) bor L2, - {Name, Rest} = lists_split(Names, L), - [Name | transform_lfname(Rest)]. - - lists_split(List, 0) when is_list(List) -> {[], List}; lists_split(List, N) when is_list(List), is_integer(N), N < 0 -> |