From 61138cb31e1f6ad44d3ca54e668de9d2d4adb2ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?=
Date: Wed, 9 Apr 2014 12:42:32 +0200
Subject: Support path names with characters outside the US ASCII range
---
lib/stdlib/doc/src/erl_tar.xml | 14 +++++++++
lib/stdlib/src/erl_tar.erl | 24 ++++++++++++++--
lib/stdlib/test/tar_SUITE.erl | 65 ++++++++++++++++++++++++++++++++++++++++--
3 files changed, 99 insertions(+), 4 deletions(-)
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index f81e36f810..afa4196be1 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -64,6 +64,20 @@
format_error/1 function.
+
LIMITATIONS
For maximum compatibility, it is safe to archive files with names
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 40b48d7999..89b4ea6c04 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -381,7 +381,12 @@ to_octal(Int, Count, Result) ->
to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]).
to_string(Str0, Count) ->
- Str = list_to_binary(Str0),
+ Str = case file:native_name_encoding() of
+ utf8 ->
+ unicode:characters_to_binary(Str0);
+ latin1 ->
+ list_to_binary(Str0)
+ end,
case byte_size(Str) of
Size when Size < Count ->
[Str|zeroes(Count-Size)];
@@ -608,7 +613,22 @@ typeflag(Bin) ->
%% Get the name of the file from the prefix and name fields of the
%% tar header.
-get_name(Bin) ->
+get_name(Bin0) ->
+ List0 = get_name_raw(Bin0),
+ case file:native_name_encoding() of
+ utf8 ->
+ Bin = list_to_binary(List0),
+ case unicode:characters_to_list(Bin) of
+ {error,_,_} ->
+ List0;
+ List when is_list(List) ->
+ List
+ end;
+ latin1 ->
+ List0
+ end.
+
+get_name_raw(Bin) ->
Name = from_string(Bin, ?th_name, ?th_name_len),
case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of
[0] ->
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 5bc34e35af..297f5b161a 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -23,7 +23,7 @@
create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1,
extract_from_binary_compressed/1,
extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1,
- memory/1]).
+ memory/1,unicode/1]).
-include_lib("test_server/include/test_server.hrl").
-include_lib("kernel/include/file.hrl").
@@ -34,7 +34,7 @@ all() ->
[borderline, atomic, long_names, create_long_names,
bad_tar, errors, extract_from_binary,
extract_from_binary_compressed, extract_from_open_file,
- symlinks, open_add_close, cooked_compressed, memory].
+ symlinks, open_add_close, cooked_compressed, memory, unicode].
groups() ->
[].
@@ -726,6 +726,56 @@ memory(Config) when is_list(Config) ->
?line ok = delete_files([Name1,Name2]),
ok.
+%% Test filenames with characters outside the US ASCII range.
+unicode(Config) when is_list(Config) ->
+ PrivDir = ?config(priv_dir, Config),
+ do_unicode(PrivDir),
+ case has_transparent_naming() of
+ true ->
+ Pa = filename:dirname(code:which(?MODULE)),
+ Node = start_node(unicode, "+fnl -pa "++Pa),
+ ok = rpc:call(Node, erlang, apply,
+ [fun() -> do_unicode(PrivDir) end,[]]),
+ true = test_server:stop_node(Node),
+ ok;
+ false ->
+ ok
+ end.
+
+has_transparent_naming() ->
+ case os:type() of
+ {unix,darwin} -> false;
+ {unix,_} -> true;
+ _ -> false
+ end.
+
+do_unicode(PrivDir) ->
+ ok = file:set_cwd(PrivDir),
+ ok = file:make_dir("unicöde"),
+
+ Names = unicode_create_files(),
+ Tar = "unicöde.tar",
+ ok = erl_tar:create(Tar, ["unicöde"], []),
+ {ok,Names} = erl_tar:table(Tar, []),
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = erl_tar:extract(Tar),
+ _ = [{ok,_} = file:read_file(Name) || Name <- Names],
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = file:del_dir("unicöde"),
+ ok.
+
+unicode_create_files() ->
+ FileA = "unicöde/smörgåsbord",
+ ok = file:write_file(FileA, "yum!\n"),
+ [FileA|case file:native_name_encoding() of
+ utf8 ->
+ FileB = "unicöde/Хороший файл!",
+ ok = file:write_file(FileB, "But almost empty.\n"),
+ [FileB];
+ latin1 ->
+ []
+ end].
+
%% Delete the given list of files.
delete_files([]) -> ok;
delete_files([Item|Rest]) ->
@@ -791,3 +841,14 @@ make_temp_dir(Base, I) ->
ok -> Name;
{error,eexist} -> make_temp_dir(Base, I+1)
end.
+
+start_node(Name, Args) ->
+ [_,Host] = string:tokens(atom_to_list(node()), "@"),
+ ct:log("Trying to start ~w@~s~n", [Name,Host]),
+ case test_server:start_node(Name, peer, [{args,Args}]) of
+ {error,Reason} ->
+ test_server:fail(Reason);
+ {ok,Node} ->
+ ct:log("Node ~p started~n", [Node]),
+ Node
+ end.
--
cgit v1.2.3
From bb2b84c35e75ae80174ef54dad7babf6c6fa9075 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?=
Date: Wed, 9 Apr 2014 16:30:49 +0200
Subject: Correct end of tape marker
The POSIX standard for tar says that there must be at least
two 512-bytes zero blocks at the end of the tar archive file.
Our implementation would only emit a single 512-byte zero block if the
size of the last file was in the range 18*512 through 19*512-1 (modulo
20*512). GNU tar would correctly unpack such tar archive file, but
would emit a warning:
tar: A lone zero block at 20
---
lib/stdlib/src/erl_tar.erl | 14 +++++++++++---
lib/stdlib/test/tar_SUITE.erl | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 89b4ea6c04..acf7a5cd40 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -397,9 +397,17 @@ to_string(Str0, Count) ->
pad_file(File) ->
{ok,Position} = file:position(File, {cur,0}),
- %% There must be at least one empty record at the end of the file.
- Zeros = zeroes(?block_size - (Position rem ?block_size)),
- file:write(File, Zeros).
+ %% There must be at least two zero records at the end.
+ Fill = case ?block_size - (Position rem ?block_size) of
+ Fill0 when Fill0 < 2*?record_size ->
+ %% We need to another block here to ensure that there
+ %% are at least two zero records at the end.
+ Fill0 + ?block_size;
+ Fill0 ->
+ %% Large enough.
+ Fill0
+ end,
+ file:write(File, zeroes(Fill)).
split_filename(Name) when length(Name) =< ?th_name_len ->
{"", Name};
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 297f5b161a..6349139925 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -73,6 +73,7 @@ borderline(Config) when is_list(Config) ->
?line lists:foreach(fun(Size) -> borderline_test(Size, TempDir) end,
[0, 1, 10, 13, 127, 333, Record-1, Record, Record+1,
+ Block-2*Record-1, Block-2*Record, Block-2*Record+1,
Block-Record-1, Block-Record, Block-Record+1,
Block-1, Block, Block+1,
Block+Record-1, Block+Record, Block+Record+1]),
--
cgit v1.2.3
From fa67c39896487a3f6f382d8a38c3663ca25646af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?=
Date: Thu, 10 Apr 2014 13:55:01 +0200
Subject: Update information about compatibility
GNU tar now supports the 'ustar' format.
---
lib/stdlib/doc/src/erl_tar.xml | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index afa4196be1..ccf0f3901f 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -35,10 +35,11 @@
Unix 'tar' utility for reading and writing tar archives
The erl_tar module archives and extract files to and from
- a tar file. The tar file format is the POSIX extended tar file format
- specified in IEEE Std 1003.1 and ISO/IEC 9945-1. That is the same
- format as used by tar program on Solaris, but is not the same
- as used by the GNU tar program.
+ a tar file. erl_tar supports the ustar format
+ (IEEE Std 1003.1 and ISO/IEC 9945-1). All modern tar
+ programs (including GNU tar) can read this format. To ensure that
+ that GNU tar produces a tar file that erl_tar can read,
+ give the --format=ustar option to GNU tar.
By convention, the name of a tar file should end in ".tar".
To abide to the convention, you'll need to add ".tar" yourself
to the name.
--
cgit v1.2.3
From 92a5b09ed50c905a6480b74f10af770d98eea5bd Mon Sep 17 00:00:00 2001
From: Yuki Ito
Date: Sat, 12 Apr 2014 20:46:26 +0900
Subject: Fix typo in erl_tar docs
---
lib/stdlib/doc/src/erl_tar.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index ccf0f3901f..6d5b07059a 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -128,7 +128,7 @@
TarDescriptor = term()
FilenameOrBin = Filename()|binary()
- Filename = filename()()
+ Filename = filename()
NameInArchive = filename()
Options = [Option]
Option = dereference|verbose
--
cgit v1.2.3
From d02469d12f103276345dc0a3b024f76f6d6763fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?=
Date: Thu, 17 Apr 2014 12:09:20 +0200
Subject: Correct typo in type specification
Types start with a lower-case letter.
---
lib/stdlib/doc/src/erl_tar.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index 6d5b07059a..7f25f5b7bc 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -127,7 +127,7 @@
Add a file to an open tar file
TarDescriptor = term()
- FilenameOrBin = Filename()|binary()
+ FilenameOrBin = filename()|binary()
Filename = filename()
NameInArchive = filename()
Options = [Option]
--
cgit v1.2.3