aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-05-09 12:09:26 +0200
committerBjörn Gustavsson <[email protected]>2014-05-09 12:09:26 +0200
commit17a4fefd66da7e34775c5ddb9ac146816d5abd42 (patch)
tree8d32c07c257d9077a05415ff539ba3fee3c920e5
parent38359508acec5f0bfe51add66d3bd4cbbcf5520f (diff)
parentd02469d12f103276345dc0a3b024f76f6d6763fc (diff)
downloadotp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.tar.gz
otp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.tar.bz2
otp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.zip
Merge branch 'bjorn/stdlib/erl_tar/OTP-11854' into maint
* bjorn/stdlib/erl_tar/OTP-11854: Correct typo in type specification Fix typo in erl_tar docs Update information about compatibility Correct end of tape marker Support path names with characters outside the US ASCII range
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml27
-rw-r--r--lib/stdlib/src/erl_tar.erl38
-rw-r--r--lib/stdlib/test/tar_SUITE.erl66
3 files changed, 118 insertions, 13 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index f81e36f810..7f25f5b7bc 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -35,10 +35,11 @@
<modulesummary>Unix 'tar' utility for reading and writing tar archives</modulesummary>
<description>
<p>The <c>erl_tar</c> module archives and extract files to and from
- a tar file. The tar file format is the POSIX extended tar file format
- specified in IEEE Std 1003.1 and ISO/IEC&nbsp;9945-1. That is the same
- format as used by <c>tar</c> program on Solaris, but is not the same
- as used by the GNU tar program.</p>
+ a tar file. <c>erl_tar</c> supports the <c>ustar</c> format
+ (IEEE Std 1003.1 and ISO/IEC&nbsp;9945-1). All modern <c>tar</c>
+ programs (including GNU tar) can read this format. To ensure that
+ that GNU tar produces a tar file that <c>erl_tar</c> can read,
+ give the <c>--format=ustar</c> option to GNU tar.</p>
<p>By convention, the name of a tar file should end in "<c>.tar</c>".
To abide to the convention, you'll need to add "<c>.tar</c>" yourself
to the name.</p>
@@ -65,6 +66,20 @@
</description>
<section>
+ <title>UNICODE SUPPORT</title>
+ <p>If <seealso
+ marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso>
+ returns <c>utf8</c>, path names will be encoded in UTF-8 when
+ creating tar files and path names will be assumed to be encoded in
+ UTF-8 when extracting tar files.</p>
+
+ <p>If <seealso
+ marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso>
+ returns <c>latin1</c>, no translation of path names will be
+ done.</p>
+ </section>
+
+ <section>
<title>LIMITATIONS</title>
<p>For maximum compatibility, it is safe to archive files with names
up to 100 characters in length. Such tar files can generally be
@@ -112,8 +127,8 @@
<fsummary>Add a file to an open tar file</fsummary>
<type>
<v>TarDescriptor = term()</v>
- <v>FilenameOrBin = Filename()|binary()</v>
- <v>Filename = filename()()</v>
+ <v>FilenameOrBin = filename()|binary()</v>
+ <v>Filename = filename()</v>
<v>NameInArchive = filename()</v>
<v>Options = [Option]</v>
<v>Option = dereference|verbose</v>
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 40b48d7999..acf7a5cd40 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -381,7 +381,12 @@ to_octal(Int, Count, Result) ->
to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]).
to_string(Str0, Count) ->
- Str = list_to_binary(Str0),
+ Str = case file:native_name_encoding() of
+ utf8 ->
+ unicode:characters_to_binary(Str0);
+ latin1 ->
+ list_to_binary(Str0)
+ end,
case byte_size(Str) of
Size when Size < Count ->
[Str|zeroes(Count-Size)];
@@ -392,9 +397,17 @@ to_string(Str0, Count) ->
pad_file(File) ->
{ok,Position} = file:position(File, {cur,0}),
- %% There must be at least one empty record at the end of the file.
- Zeros = zeroes(?block_size - (Position rem ?block_size)),
- file:write(File, Zeros).
+ %% There must be at least two zero records at the end.
+ Fill = case ?block_size - (Position rem ?block_size) of
+ Fill0 when Fill0 < 2*?record_size ->
+ %% We need to another block here to ensure that there
+ %% are at least two zero records at the end.
+ Fill0 + ?block_size;
+ Fill0 ->
+ %% Large enough.
+ Fill0
+ end,
+ file:write(File, zeroes(Fill)).
split_filename(Name) when length(Name) =< ?th_name_len ->
{"", Name};
@@ -608,7 +621,22 @@ typeflag(Bin) ->
%% Get the name of the file from the prefix and name fields of the
%% tar header.
-get_name(Bin) ->
+get_name(Bin0) ->
+ List0 = get_name_raw(Bin0),
+ case file:native_name_encoding() of
+ utf8 ->
+ Bin = list_to_binary(List0),
+ case unicode:characters_to_list(Bin) of
+ {error,_,_} ->
+ List0;
+ List when is_list(List) ->
+ List
+ end;
+ latin1 ->
+ List0
+ end.
+
+get_name_raw(Bin) ->
Name = from_string(Bin, ?th_name, ?th_name_len),
case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of
[0] ->
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 5bc34e35af..6349139925 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -23,7 +23,7 @@
create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1,
extract_from_binary_compressed/1,
extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1,
- memory/1]).
+ memory/1,unicode/1]).
-include_lib("test_server/include/test_server.hrl").
-include_lib("kernel/include/file.hrl").
@@ -34,7 +34,7 @@ all() ->
[borderline, atomic, long_names, create_long_names,
bad_tar, errors, extract_from_binary,
extract_from_binary_compressed, extract_from_open_file,
- symlinks, open_add_close, cooked_compressed, memory].
+ symlinks, open_add_close, cooked_compressed, memory, unicode].
groups() ->
[].
@@ -73,6 +73,7 @@ borderline(Config) when is_list(Config) ->
?line lists:foreach(fun(Size) -> borderline_test(Size, TempDir) end,
[0, 1, 10, 13, 127, 333, Record-1, Record, Record+1,
+ Block-2*Record-1, Block-2*Record, Block-2*Record+1,
Block-Record-1, Block-Record, Block-Record+1,
Block-1, Block, Block+1,
Block+Record-1, Block+Record, Block+Record+1]),
@@ -726,6 +727,56 @@ memory(Config) when is_list(Config) ->
?line ok = delete_files([Name1,Name2]),
ok.
+%% Test filenames with characters outside the US ASCII range.
+unicode(Config) when is_list(Config) ->
+ PrivDir = ?config(priv_dir, Config),
+ do_unicode(PrivDir),
+ case has_transparent_naming() of
+ true ->
+ Pa = filename:dirname(code:which(?MODULE)),
+ Node = start_node(unicode, "+fnl -pa "++Pa),
+ ok = rpc:call(Node, erlang, apply,
+ [fun() -> do_unicode(PrivDir) end,[]]),
+ true = test_server:stop_node(Node),
+ ok;
+ false ->
+ ok
+ end.
+
+has_transparent_naming() ->
+ case os:type() of
+ {unix,darwin} -> false;
+ {unix,_} -> true;
+ _ -> false
+ end.
+
+do_unicode(PrivDir) ->
+ ok = file:set_cwd(PrivDir),
+ ok = file:make_dir("unicöde"),
+
+ Names = unicode_create_files(),
+ Tar = "unicöde.tar",
+ ok = erl_tar:create(Tar, ["unicöde"], []),
+ {ok,Names} = erl_tar:table(Tar, []),
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = erl_tar:extract(Tar),
+ _ = [{ok,_} = file:read_file(Name) || Name <- Names],
+ _ = [ok = file:delete(Name) || Name <- Names],
+ ok = file:del_dir("unicöde"),
+ ok.
+
+unicode_create_files() ->
+ FileA = "unicöde/smörgåsbord",
+ ok = file:write_file(FileA, "yum!\n"),
+ [FileA|case file:native_name_encoding() of
+ utf8 ->
+ FileB = "unicöde/Хороший файл!",
+ ok = file:write_file(FileB, "But almost empty.\n"),
+ [FileB];
+ latin1 ->
+ []
+ end].
+
%% Delete the given list of files.
delete_files([]) -> ok;
delete_files([Item|Rest]) ->
@@ -791,3 +842,14 @@ make_temp_dir(Base, I) ->
ok -> Name;
{error,eexist} -> make_temp_dir(Base, I+1)
end.
+
+start_node(Name, Args) ->
+ [_,Host] = string:tokens(atom_to_list(node()), "@"),
+ ct:log("Trying to start ~w@~s~n", [Name,Host]),
+ case test_server:start_node(Name, peer, [{args,Args}]) of
+ {error,Reason} ->
+ test_server:fail(Reason);
+ {ok,Node} ->
+ ct:log("Node ~p started~n", [Node]),
+ Node
+ end.