diff options
author | Björn Gustavsson <[email protected]> | 2014-04-09 12:42:32 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2014-05-08 14:26:27 +0200 |
commit | 61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec (patch) | |
tree | ded5ff5de1c9f386601a7e16ac922ab279650fe4 /lib/stdlib | |
parent | 07b8f441ca711f9812fad9e9115bab3c3aa92f79 (diff) | |
download | otp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.tar.gz otp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.tar.bz2 otp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.zip |
Support path names with characters outside the US ASCII range
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/erl_tar.xml | 14 | ||||
-rw-r--r-- | lib/stdlib/src/erl_tar.erl | 24 | ||||
-rw-r--r-- | lib/stdlib/test/tar_SUITE.erl | 65 |
3 files changed, 99 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml index f81e36f810..afa4196be1 100644 --- a/lib/stdlib/doc/src/erl_tar.xml +++ b/lib/stdlib/doc/src/erl_tar.xml @@ -65,6 +65,20 @@ </description> <section> + <title>UNICODE SUPPORT</title> + <p>If <seealso + marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso> + returns <c>utf8</c>, path names will be encoded in UTF-8 when + creating tar files and path names will be assumed to be encoded in + UTF-8 when extracting tar files.</p> + + <p>If <seealso + marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso> + returns <c>latin1</c>, no translation of path names will be + done.</p> + </section> + + <section> <title>LIMITATIONS</title> <p>For maximum compatibility, it is safe to archive files with names up to 100 characters in length. Such tar files can generally be diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl index 40b48d7999..89b4ea6c04 100644 --- a/lib/stdlib/src/erl_tar.erl +++ b/lib/stdlib/src/erl_tar.erl @@ -381,7 +381,12 @@ to_octal(Int, Count, Result) -> to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]). to_string(Str0, Count) -> - Str = list_to_binary(Str0), + Str = case file:native_name_encoding() of + utf8 -> + unicode:characters_to_binary(Str0); + latin1 -> + list_to_binary(Str0) + end, case byte_size(Str) of Size when Size < Count -> [Str|zeroes(Count-Size)]; @@ -608,7 +613,22 @@ typeflag(Bin) -> %% Get the name of the file from the prefix and name fields of the %% tar header. -get_name(Bin) -> +get_name(Bin0) -> + List0 = get_name_raw(Bin0), + case file:native_name_encoding() of + utf8 -> + Bin = list_to_binary(List0), + case unicode:characters_to_list(Bin) of + {error,_,_} -> + List0; + List when is_list(List) -> + List + end; + latin1 -> + List0 + end. + +get_name_raw(Bin) -> Name = from_string(Bin, ?th_name, ?th_name_len), case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of [0] -> diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl index 5bc34e35af..297f5b161a 100644 --- a/lib/stdlib/test/tar_SUITE.erl +++ b/lib/stdlib/test/tar_SUITE.erl @@ -23,7 +23,7 @@ create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1, extract_from_binary_compressed/1, extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1, - memory/1]). + memory/1,unicode/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -34,7 +34,7 @@ all() -> [borderline, atomic, long_names, create_long_names, bad_tar, errors, extract_from_binary, extract_from_binary_compressed, extract_from_open_file, - symlinks, open_add_close, cooked_compressed, memory]. + symlinks, open_add_close, cooked_compressed, memory, unicode]. groups() -> []. @@ -726,6 +726,56 @@ memory(Config) when is_list(Config) -> ?line ok = delete_files([Name1,Name2]), ok. +%% Test filenames with characters outside the US ASCII range. +unicode(Config) when is_list(Config) -> + PrivDir = ?config(priv_dir, Config), + do_unicode(PrivDir), + case has_transparent_naming() of + true -> + Pa = filename:dirname(code:which(?MODULE)), + Node = start_node(unicode, "+fnl -pa "++Pa), + ok = rpc:call(Node, erlang, apply, + [fun() -> do_unicode(PrivDir) end,[]]), + true = test_server:stop_node(Node), + ok; + false -> + ok + end. + +has_transparent_naming() -> + case os:type() of + {unix,darwin} -> false; + {unix,_} -> true; + _ -> false + end. + +do_unicode(PrivDir) -> + ok = file:set_cwd(PrivDir), + ok = file:make_dir("unicöde"), + + Names = unicode_create_files(), + Tar = "unicöde.tar", + ok = erl_tar:create(Tar, ["unicöde"], []), + {ok,Names} = erl_tar:table(Tar, []), + _ = [ok = file:delete(Name) || Name <- Names], + ok = erl_tar:extract(Tar), + _ = [{ok,_} = file:read_file(Name) || Name <- Names], + _ = [ok = file:delete(Name) || Name <- Names], + ok = file:del_dir("unicöde"), + ok. + +unicode_create_files() -> + FileA = "unicöde/smörgåsbord", + ok = file:write_file(FileA, "yum!\n"), + [FileA|case file:native_name_encoding() of + utf8 -> + FileB = "unicöde/Хороший файл!", + ok = file:write_file(FileB, "But almost empty.\n"), + [FileB]; + latin1 -> + [] + end]. + %% Delete the given list of files. delete_files([]) -> ok; delete_files([Item|Rest]) -> @@ -791,3 +841,14 @@ make_temp_dir(Base, I) -> ok -> Name; {error,eexist} -> make_temp_dir(Base, I+1) end. + +start_node(Name, Args) -> + [_,Host] = string:tokens(atom_to_list(node()), "@"), + ct:log("Trying to start ~w@~s~n", [Name,Host]), + case test_server:start_node(Name, peer, [{args,Args}]) of + {error,Reason} -> + test_server:fail(Reason); + {ok,Node} -> + ct:log("Node ~p started~n", [Node]), + Node + end. |