diff options
author | Björn Gustavsson <[email protected]> | 2014-05-09 12:09:26 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2014-05-09 12:09:26 +0200 |
commit | 17a4fefd66da7e34775c5ddb9ac146816d5abd42 (patch) | |
tree | 8d32c07c257d9077a05415ff539ba3fee3c920e5 | |
parent | 38359508acec5f0bfe51add66d3bd4cbbcf5520f (diff) | |
parent | d02469d12f103276345dc0a3b024f76f6d6763fc (diff) | |
download | otp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.tar.gz otp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.tar.bz2 otp-17a4fefd66da7e34775c5ddb9ac146816d5abd42.zip |
Merge branch 'bjorn/stdlib/erl_tar/OTP-11854' into maint
* bjorn/stdlib/erl_tar/OTP-11854:
Correct typo in type specification
Fix typo in erl_tar docs
Update information about compatibility
Correct end of tape marker
Support path names with characters outside the US ASCII range
-rw-r--r-- | lib/stdlib/doc/src/erl_tar.xml | 27 | ||||
-rw-r--r-- | lib/stdlib/src/erl_tar.erl | 38 | ||||
-rw-r--r-- | lib/stdlib/test/tar_SUITE.erl | 66 |
3 files changed, 118 insertions, 13 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml index f81e36f810..7f25f5b7bc 100644 --- a/lib/stdlib/doc/src/erl_tar.xml +++ b/lib/stdlib/doc/src/erl_tar.xml @@ -35,10 +35,11 @@ <modulesummary>Unix 'tar' utility for reading and writing tar archives</modulesummary> <description> <p>The <c>erl_tar</c> module archives and extract files to and from - a tar file. The tar file format is the POSIX extended tar file format - specified in IEEE Std 1003.1 and ISO/IEC 9945-1. That is the same - format as used by <c>tar</c> program on Solaris, but is not the same - as used by the GNU tar program.</p> + a tar file. <c>erl_tar</c> supports the <c>ustar</c> format + (IEEE Std 1003.1 and ISO/IEC 9945-1). All modern <c>tar</c> + programs (including GNU tar) can read this format. To ensure that + that GNU tar produces a tar file that <c>erl_tar</c> can read, + give the <c>--format=ustar</c> option to GNU tar.</p> <p>By convention, the name of a tar file should end in "<c>.tar</c>". To abide to the convention, you'll need to add "<c>.tar</c>" yourself to the name.</p> @@ -65,6 +66,20 @@ </description> <section> + <title>UNICODE SUPPORT</title> + <p>If <seealso + marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso> + returns <c>utf8</c>, path names will be encoded in UTF-8 when + creating tar files and path names will be assumed to be encoded in + UTF-8 when extracting tar files.</p> + + <p>If <seealso + marker="kernel:file#native_name_encoding/0">file:native_name_encoding/0</seealso> + returns <c>latin1</c>, no translation of path names will be + done.</p> + </section> + + <section> <title>LIMITATIONS</title> <p>For maximum compatibility, it is safe to archive files with names up to 100 characters in length. Such tar files can generally be @@ -112,8 +127,8 @@ <fsummary>Add a file to an open tar file</fsummary> <type> <v>TarDescriptor = term()</v> - <v>FilenameOrBin = Filename()|binary()</v> - <v>Filename = filename()()</v> + <v>FilenameOrBin = filename()|binary()</v> + <v>Filename = filename()</v> <v>NameInArchive = filename()</v> <v>Options = [Option]</v> <v>Option = dereference|verbose</v> diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl index 40b48d7999..acf7a5cd40 100644 --- a/lib/stdlib/src/erl_tar.erl +++ b/lib/stdlib/src/erl_tar.erl @@ -381,7 +381,12 @@ to_octal(Int, Count, Result) -> to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]). to_string(Str0, Count) -> - Str = list_to_binary(Str0), + Str = case file:native_name_encoding() of + utf8 -> + unicode:characters_to_binary(Str0); + latin1 -> + list_to_binary(Str0) + end, case byte_size(Str) of Size when Size < Count -> [Str|zeroes(Count-Size)]; @@ -392,9 +397,17 @@ to_string(Str0, Count) -> pad_file(File) -> {ok,Position} = file:position(File, {cur,0}), - %% There must be at least one empty record at the end of the file. - Zeros = zeroes(?block_size - (Position rem ?block_size)), - file:write(File, Zeros). + %% There must be at least two zero records at the end. + Fill = case ?block_size - (Position rem ?block_size) of + Fill0 when Fill0 < 2*?record_size -> + %% We need to another block here to ensure that there + %% are at least two zero records at the end. + Fill0 + ?block_size; + Fill0 -> + %% Large enough. + Fill0 + end, + file:write(File, zeroes(Fill)). split_filename(Name) when length(Name) =< ?th_name_len -> {"", Name}; @@ -608,7 +621,22 @@ typeflag(Bin) -> %% Get the name of the file from the prefix and name fields of the %% tar header. -get_name(Bin) -> +get_name(Bin0) -> + List0 = get_name_raw(Bin0), + case file:native_name_encoding() of + utf8 -> + Bin = list_to_binary(List0), + case unicode:characters_to_list(Bin) of + {error,_,_} -> + List0; + List when is_list(List) -> + List + end; + latin1 -> + List0 + end. + +get_name_raw(Bin) -> Name = from_string(Bin, ?th_name, ?th_name_len), case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of [0] -> diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl index 5bc34e35af..6349139925 100644 --- a/lib/stdlib/test/tar_SUITE.erl +++ b/lib/stdlib/test/tar_SUITE.erl @@ -23,7 +23,7 @@ create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1, extract_from_binary_compressed/1, extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1, - memory/1]). + memory/1,unicode/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -34,7 +34,7 @@ all() -> [borderline, atomic, long_names, create_long_names, bad_tar, errors, extract_from_binary, extract_from_binary_compressed, extract_from_open_file, - symlinks, open_add_close, cooked_compressed, memory]. + symlinks, open_add_close, cooked_compressed, memory, unicode]. groups() -> []. @@ -73,6 +73,7 @@ borderline(Config) when is_list(Config) -> ?line lists:foreach(fun(Size) -> borderline_test(Size, TempDir) end, [0, 1, 10, 13, 127, 333, Record-1, Record, Record+1, + Block-2*Record-1, Block-2*Record, Block-2*Record+1, Block-Record-1, Block-Record, Block-Record+1, Block-1, Block, Block+1, Block+Record-1, Block+Record, Block+Record+1]), @@ -726,6 +727,56 @@ memory(Config) when is_list(Config) -> ?line ok = delete_files([Name1,Name2]), ok. +%% Test filenames with characters outside the US ASCII range. +unicode(Config) when is_list(Config) -> + PrivDir = ?config(priv_dir, Config), + do_unicode(PrivDir), + case has_transparent_naming() of + true -> + Pa = filename:dirname(code:which(?MODULE)), + Node = start_node(unicode, "+fnl -pa "++Pa), + ok = rpc:call(Node, erlang, apply, + [fun() -> do_unicode(PrivDir) end,[]]), + true = test_server:stop_node(Node), + ok; + false -> + ok + end. + +has_transparent_naming() -> + case os:type() of + {unix,darwin} -> false; + {unix,_} -> true; + _ -> false + end. + +do_unicode(PrivDir) -> + ok = file:set_cwd(PrivDir), + ok = file:make_dir("unicöde"), + + Names = unicode_create_files(), + Tar = "unicöde.tar", + ok = erl_tar:create(Tar, ["unicöde"], []), + {ok,Names} = erl_tar:table(Tar, []), + _ = [ok = file:delete(Name) || Name <- Names], + ok = erl_tar:extract(Tar), + _ = [{ok,_} = file:read_file(Name) || Name <- Names], + _ = [ok = file:delete(Name) || Name <- Names], + ok = file:del_dir("unicöde"), + ok. + +unicode_create_files() -> + FileA = "unicöde/smörgåsbord", + ok = file:write_file(FileA, "yum!\n"), + [FileA|case file:native_name_encoding() of + utf8 -> + FileB = "unicöde/Хороший файл!", + ok = file:write_file(FileB, "But almost empty.\n"), + [FileB]; + latin1 -> + [] + end]. + %% Delete the given list of files. delete_files([]) -> ok; delete_files([Item|Rest]) -> @@ -791,3 +842,14 @@ make_temp_dir(Base, I) -> ok -> Name; {error,eexist} -> make_temp_dir(Base, I+1) end. + +start_node(Name, Args) -> + [_,Host] = string:tokens(atom_to_list(node()), "@"), + ct:log("Trying to start ~w@~s~n", [Name,Host]), + case test_server:start_node(Name, peer, [{args,Args}]) of + {error,Reason} -> + test_server:fail(Reason); + {ok,Node} -> + ct:log("Node ~p started~n", [Node]), + Node + end. |