From 61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 9 Apr 2014 12:42:32 +0200 Subject: Support path names with characters outside the US ASCII range --- lib/stdlib/doc/src/erl_tar.xml | 14 +++++++++ lib/stdlib/src/erl_tar.erl | 24 ++++++++++++++-- lib/stdlib/test/tar_SUITE.erl | 65 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml index f81e36f810..afa4196be1 100644 --- a/lib/stdlib/doc/src/erl_tar.xml +++ b/lib/stdlib/doc/src/erl_tar.xml @@ -64,6 +64,20 @@ format_error/1 function.

+
+ UNICODE SUPPORT +

If file:native_name_encoding/0 + returns utf8, path names will be encoded in UTF-8 when + creating tar files and path names will be assumed to be encoded in + UTF-8 when extracting tar files.

+ +

If file:native_name_encoding/0 + returns latin1, no translation of path names will be + done.

+
+
LIMITATIONS

For maximum compatibility, it is safe to archive files with names diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl index 40b48d7999..89b4ea6c04 100644 --- a/lib/stdlib/src/erl_tar.erl +++ b/lib/stdlib/src/erl_tar.erl @@ -381,7 +381,12 @@ to_octal(Int, Count, Result) -> to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]). to_string(Str0, Count) -> - Str = list_to_binary(Str0), + Str = case file:native_name_encoding() of + utf8 -> + unicode:characters_to_binary(Str0); + latin1 -> + list_to_binary(Str0) + end, case byte_size(Str) of Size when Size < Count -> [Str|zeroes(Count-Size)]; @@ -608,7 +613,22 @@ typeflag(Bin) -> %% Get the name of the file from the prefix and name fields of the %% tar header. -get_name(Bin) -> +get_name(Bin0) -> + List0 = get_name_raw(Bin0), + case file:native_name_encoding() of + utf8 -> + Bin = list_to_binary(List0), + case unicode:characters_to_list(Bin) of + {error,_,_} -> + List0; + List when is_list(List) -> + List + end; + latin1 -> + List0 + end. + +get_name_raw(Bin) -> Name = from_string(Bin, ?th_name, ?th_name_len), case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of [0] -> diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl index 5bc34e35af..297f5b161a 100644 --- a/lib/stdlib/test/tar_SUITE.erl +++ b/lib/stdlib/test/tar_SUITE.erl @@ -23,7 +23,7 @@ create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1, extract_from_binary_compressed/1, extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1, - memory/1]). + memory/1,unicode/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -34,7 +34,7 @@ all() -> [borderline, atomic, long_names, create_long_names, bad_tar, errors, extract_from_binary, extract_from_binary_compressed, extract_from_open_file, - symlinks, open_add_close, cooked_compressed, memory]. + symlinks, open_add_close, cooked_compressed, memory, unicode]. groups() -> []. @@ -726,6 +726,56 @@ memory(Config) when is_list(Config) -> ?line ok = delete_files([Name1,Name2]), ok. +%% Test filenames with characters outside the US ASCII range. +unicode(Config) when is_list(Config) -> + PrivDir = ?config(priv_dir, Config), + do_unicode(PrivDir), + case has_transparent_naming() of + true -> + Pa = filename:dirname(code:which(?MODULE)), + Node = start_node(unicode, "+fnl -pa "++Pa), + ok = rpc:call(Node, erlang, apply, + [fun() -> do_unicode(PrivDir) end,[]]), + true = test_server:stop_node(Node), + ok; + false -> + ok + end. + +has_transparent_naming() -> + case os:type() of + {unix,darwin} -> false; + {unix,_} -> true; + _ -> false + end. + +do_unicode(PrivDir) -> + ok = file:set_cwd(PrivDir), + ok = file:make_dir("unicöde"), + + Names = unicode_create_files(), + Tar = "unicöde.tar", + ok = erl_tar:create(Tar, ["unicöde"], []), + {ok,Names} = erl_tar:table(Tar, []), + _ = [ok = file:delete(Name) || Name <- Names], + ok = erl_tar:extract(Tar), + _ = [{ok,_} = file:read_file(Name) || Name <- Names], + _ = [ok = file:delete(Name) || Name <- Names], + ok = file:del_dir("unicöde"), + ok. + +unicode_create_files() -> + FileA = "unicöde/smörgåsbord", + ok = file:write_file(FileA, "yum!\n"), + [FileA|case file:native_name_encoding() of + utf8 -> + FileB = "unicöde/Хороший файл!", + ok = file:write_file(FileB, "But almost empty.\n"), + [FileB]; + latin1 -> + [] + end]. + %% Delete the given list of files. delete_files([]) -> ok; delete_files([Item|Rest]) -> @@ -791,3 +841,14 @@ make_temp_dir(Base, I) -> ok -> Name; {error,eexist} -> make_temp_dir(Base, I+1) end. + +start_node(Name, Args) -> + [_,Host] = string:tokens(atom_to_list(node()), "@"), + ct:log("Trying to start ~w@~s~n", [Name,Host]), + case test_server:start_node(Name, peer, [{args,Args}]) of + {error,Reason} -> + test_server:fail(Reason); + {ok,Node} -> + ct:log("Node ~p started~n", [Node]), + Node + end. -- cgit v1.2.3