aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-04-09 12:42:32 +0200
committerBjörn Gustavsson <[email protected]>2014-05-08 14:26:27 +0200
commit61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec (patch)
treeded5ff5de1c9f386601a7e16ac922ab279650fe4
parent07b8f441ca711f9812fad9e9115bab3c3aa92f79 (diff)
downloadotp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.tar.gz
otp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.tar.bz2
otp-61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec.zip
Support path names with characters outside the US ASCII range
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml14
-rw-r--r--lib/stdlib/src/erl_tar.erl24
-rw-r--r--lib/stdlib/test/tar_SUITE.erl65
3 files changed, 99 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index f81e36f810..afa4196be1 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -65,6 +65,20 @@
</description>
<section>
+ <title>UNICODE SUPPORT</title>
+ <p>If <seealso
+ marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso>
+ returns <c>utf8</c>, path names will be encoded in UTF-8 when
+ creating tar files and path names will be assumed to be encoded in
+ UTF-8 when extracting tar files.</p>
+
+ <p>If <seealso
+ marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso>
+ returns <c>latin1</c>, no translation of path names will be
+ done.</p>
+ </section>
+
+ <section>
<title>LIMITATIONS</title>
<p>For maximum compatibility, it is safe to archive files with names
up to 100 characters in length. Such tar files can generally be
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 40b48d7999..89b4ea6c04 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -381,7 +381,12 @@ to_octal(Int, Count, Result) ->
to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]).
to_string(Str0, Count) ->
- Str = list_to_binary(Str0),
+ Str = case file:native_name_encoding() of
+ utf8 ->
+ unicode:characters_to_binary(Str0);
+ latin1 ->
+ list_to_binary(Str0)
+ end,
case byte_size(Str) of
Size when Size < Count ->
[Str|zeroes(Count-Size)];
@@ -608,7 +613,22 @@ typeflag(Bin) ->
%% Get the name of the file from the prefix and name fields of the
%% tar header.
-get_name(Bin) ->
+get_name(Bin0) ->
+ List0 = get_name_raw(Bin0),
+ case file:native_name_encoding() of
+ utf8 ->
+ Bin = list_to_binary(List0),
+ case unicode:characters_to_list(Bin) of
+ {error,_,_} ->
+ List0;
+ List when is_list(List) ->
+ List
+ end;
+ latin1 ->
+ List0
+ end.
+
+get_name_raw(Bin) ->
Name = from_string(Bin, ?th_name, ?th_name_len),
case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of
[0] ->
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 5bc34e35af..297f5b161a 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -23,7 +23,7 @@
create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1,
extract_from_binary_compressed/1,
extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1,
- memory/1]).
+ memory/1,unicode/1]).
-include_lib("test_server/include/test_server.hrl").
-include_lib("kernel/include/file.hrl").
@@ -34,7 +34,7 @@ all() ->
[borderline, atomic, long_names, create_long_names,
bad_tar, errors, extract_from_binary,
extract_from_binary_compressed, extract_from_open_file,
- symlinks, open_add_close, cooked_compressed, memory].
+ symlinks, open_add_close, cooked_compressed, memory, unicode].
groups() ->
[].
@@ -726,6 +726,56 @@ memory(Config) when is_list(Config) ->
?line ok = delete_files([Name1,Name2]),
ok.
+%% Test filenames with characters outside the US ASCII range.
+unicode(Config) when is_list(Config) ->
+ PrivDir = ?config(priv_dir, Config),
+ do_unicode(PrivDir),
+ case has_transparent_naming() of
+ true ->
+ Pa = filename:dirname(code:which(?MODULE)),
+ Node = start_node(unicode, "+fnl -pa "++Pa),
+ ok = rpc:call(Node, erlang, apply,
+ [fun() -> do_unicode(PrivDir) end,[]]),
+ true = test_server:stop_node(Node),
+ ok;
+ false ->
+ ok
+ end.
+
+has_transparent_naming() ->
+ case os:type() of
+ {unix,darwin} -> false;
+ {unix,_} -> true;
+ _ -> false
+ end.
+
+do_unicode(PrivDir) ->
+ ok = file:set_cwd(PrivDir),
+ ok = file:make_dir("unicöde"),
+
+ Names = unicode_create_files(),
+ Tar = "unicöde.tar",
+ ok = erl_tar:create(Tar, ["unicöde"], []),
+ {ok,Names} = erl_tar:table(Tar, []),
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = erl_tar:extract(Tar),
+ _ = [{ok,_} = file:read_file(Name) || Name <- Names],
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = file:del_dir("unicöde"),
+ ok.
+
+unicode_create_files() ->
+ FileA = "unicöde/smörgåsbord",
+ ok = file:write_file(FileA, "yum!\n"),
+ [FileA|case file:native_name_encoding() of
+ utf8 ->
+ FileB = "unicöde/Хороший файл!",
+ ok = file:write_file(FileB, "But almost empty.\n"),
+ [FileB];
+ latin1 ->
+ []
+ end].
+
%% Delete the given list of files.
delete_files([]) -> ok;
delete_files([Item|Rest]) ->
@@ -791,3 +841,14 @@ make_temp_dir(Base, I) ->
ok -> Name;
{error,eexist} -> make_temp_dir(Base, I+1)
end.
+
+start_node(Name, Args) ->
+ [_,Host] = string:tokens(atom_to_list(node()), "@"),
+ ct:log("Trying to start ~w@~s~n", [Name,Host]),
+ case test_server:start_node(Name, peer, [{args,Args}]) of
+ {error,Reason} ->
+ test_server:fail(Reason);
+ {ok,Node} ->
+ ct:log("Node ~p started~n", [Node]),
+ Node
+ end.