From aa0c4b0df7cdc750450906aff4e8c81627d80605 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Tue, 31 Jan 2017 17:40:34 -0600 Subject: Update erl_tar to support PAX format, etc. This commit introduces the following key changes: - Support for reading tar archives in formats currently in common use, such as v7, STAR, USTAR, PAX, and GNU tar's extensions to the STAR/USTAR format. - Support for writing PAX archives, only when necessary, using USTAR when possible for greater portability. These changes result in lifting of some prior restrictions: - Support for reading archives produced by modern tar implementations when other restrictions described below are present. - Support for filenames which exceed 100 bytes in length, or paths which exceed 255 bytes (see USTAR format specification for more details on this restriction). - Support for filenames of arbitrary length - Support for unicode metadata (the previous behaviour of erl_tar was actually violating the spec, by writing unicode-encoded data to fields which are defined to be 7-bit ASCII, even though this technically worked when using erl_tar at source and destination, it may not have worked with other tar utilities, and this implementation now conforms to the spec). - Support for uid/gid values which cannot be converted to octal integers. --- lib/sasl/src/release_handler.erl | 6 +- lib/sasl/src/systools_make.erl | 4 +- lib/stdlib/doc/src/erl_tar.xml | 72 +- lib/stdlib/src/Makefile | 4 +- lib/stdlib/src/erl_tar.erl | 2562 ++++++++++++++------- lib/stdlib/src/erl_tar.hrl | 394 ++++ lib/stdlib/test/tar_SUITE.erl | 178 +- lib/stdlib/test/tar_SUITE_data/bsd.tar | Bin 0 -> 9216 bytes lib/stdlib/test/tar_SUITE_data/gnu.tar | Bin 0 -> 30720 bytes lib/stdlib/test/tar_SUITE_data/pax_mtime.tar | Bin 0 -> 10240 bytes lib/stdlib/test/tar_SUITE_data/sparse00.tar | Bin 0 -> 61440 bytes lib/stdlib/test/tar_SUITE_data/sparse01.tar | Bin 0 -> 61440 bytes lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar | Bin 0 -> 10240 bytes lib/stdlib/test/tar_SUITE_data/sparse10.tar | Bin 0 -> 61440 bytes lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar | Bin 0 -> 10240 bytes lib/stdlib/test/tar_SUITE_data/star.tar | Bin 0 -> 10240 bytes lib/stdlib/test/tar_SUITE_data/v7.tar | Bin 0 -> 10240 bytes 17 files changed, 2308 insertions(+), 912 deletions(-) create mode 100644 lib/stdlib/src/erl_tar.hrl create mode 100644 lib/stdlib/test/tar_SUITE_data/bsd.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/gnu.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/pax_mtime.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/sparse00.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/sparse01.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/sparse10.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/star.tar create mode 100644 lib/stdlib/test/tar_SUITE_data/v7.tar (limited to 'lib') diff --git a/lib/sasl/src/release_handler.erl b/lib/sasl/src/release_handler.erl index 1fcc9a0288..3250311b8f 100644 --- a/lib/sasl/src/release_handler.erl +++ b/lib/sasl/src/release_handler.erl @@ -831,7 +831,7 @@ do_unpack_release(Root, RelDir, ReleaseName, Releases) -> Tar = filename:join(RelDir, ReleaseName ++ ".tar.gz"), do_check_file(Tar, regular), Rel = ReleaseName ++ ".rel", - extract_rel_file(filename:join("releases", Rel), Tar, Root), + _ = extract_rel_file(filename:join("releases", Rel), Tar, Root), RelFile = filename:join(RelDir, Rel), Release = check_rel(Root, RelFile, false), #release{vsn = Vsn} = Release, @@ -1841,14 +1841,12 @@ do_check_file(Master, FileName, Type) -> %% by the user in another way, i.e. ignore this here. %%----------------------------------------------------------------- extract_rel_file(Rel, Tar, Root) -> - erl_tar:extract(Tar, [{files, [Rel]}, {cwd, Root}, compressed]). + _ = erl_tar:extract(Tar, [{files, [Rel]}, {cwd, Root}, compressed]). extract_tar(Root, Tar) -> case erl_tar:extract(Tar, [keep_old_files, {cwd, Root}, compressed]) of ok -> ok; - {error, Reason, Name} -> % Old erl_tar. - throw({error, {cannot_extract_file, Name, Reason}}); {error, {Name, Reason}} -> % New erl_tar (R3A). throw({error, {cannot_extract_file, Name, Reason}}) end. diff --git a/lib/sasl/src/systools_make.erl b/lib/sasl/src/systools_make.erl index 6a16c8689e..e7609f616c 100644 --- a/lib/sasl/src/systools_make.erl +++ b/lib/sasl/src/systools_make.erl @@ -1904,8 +1904,10 @@ del_tar(Tar, TarName) -> file:delete(TarName). add_to_tar(Tar, FromFile, ToFile) -> - case erl_tar:add(Tar, FromFile, ToFile, [compressed, dereference]) of + case catch erl_tar:add(Tar, FromFile, ToFile, [compressed, dereference]) of ok -> ok; + {'EXIT', Reason} -> + throw({error, {tar_error, {add, FromFile, Reason}}}); {error, Error} -> throw({error, {tar_error, {add, FromFile, Error}}}) end. diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml index 24e7b64b9e..f28d8b425b 100644 --- a/lib/stdlib/doc/src/erl_tar.xml +++ b/lib/stdlib/doc/src/erl_tar.xml @@ -37,12 +37,13 @@

This module archives and extract files to and from - a tar file. This module supports the ustar format - (IEEE Std 1003.1 and ISO/IEC 9945-1). All modern tar - programs (including GNU tar) can read this format. To ensure that - that GNU tar produces a tar file that erl_tar can read, - specify option --format=ustar to GNU tar.

- + a tar file. This module supports reading most common tar formats, + namely v7, STAR, USTAR, and PAX, as well as some of GNU tar's extensions + to the USTAR format (sparse files most notably). It produces tar archives + in USTAR format, unless the files being archived require PAX format due to + restrictions in USTAR (such as unicode metadata, filename length, and more). + As such, erl_tar supports tar archives produced by most all modern + tar utilities, and produces tarballs which should be similarly portable.

By convention, the name of a tar file is to end in ".tar". To abide to the convention, add ".tar" to the name.

@@ -83,6 +84,8 @@

If file:native_name_encoding/0 returns latin1, no translation of path names is done.

+ +

Unicode metadata stored in PAX headers is preserved

@@ -104,21 +107,20 @@ Limitations -

For maximum compatibility, it is safe to archive files with names - up to 100 characters in length. Such tar files can generally be - extracted by any tar program.

-
- -

For filenames exceeding 100 characters in length, the resulting tar - file can only be correctly extracted by a POSIX-compatible tar - program (such as Solaris tar or a modern GNU tar).

-
- -

Files with longer names than 256 bytes cannot be stored.

+

If you must remain compatible with the USTAR tar format, you must ensure file paths being + stored are less than 255 bytes in total, with a maximum filename component + length of 100 bytes. USTAR uses a header field (prefix) in addition to the name field, and + splits file paths longer than 100 bytes into two parts. This split is done on a directory boundary, + and is done in such a way to make the best use of the space available in those two fields, but in practice + this will often mean that you have less than 255 bytes for a path. erl_tar will + automatically upgrade the format to PAX to handle longer filenames, so this is only an issue if you + need to extract the archive with an older implementation of erl_tar or tar which does + not support PAX. In this case, the PAX headers will be extracted as regular files, and you will need to + apply them manually.

-

The file name a symbolic link points is always limited - to 100 characters.

+

Like the above, if you must remain USTAR compatible, you must also ensure than paths for + symbolic/hard links are no more than 100 bytes, otherwise PAX headers will be used.

@@ -129,7 +131,9 @@ Add a file to an open tar file. TarDescriptor = term() - Filename = filename() + FilenameOrBin = filename()|binary() + NameInArchive = filename() + Filename = filename()|{NameInArchive,FilenameOrBin} Options = [Option] Option = dereference|verbose|{chunks,ChunkSize} ChunkSize = positive_integer() @@ -139,6 +143,9 @@

Adds a file to a tar file that has been opened for writing by open/1.

+

NameInArchive is the name under which the file becomes + stored in the tar file. The file gets this name when it is + extracted from the tar file.

Options:

dereference @@ -183,9 +190,6 @@ open/2. This function accepts the same options as add/3.

-

NameInArchive is the name under which the file becomes - stored in the tar file. The file gets this name when it is - extracted from the tar file.

@@ -206,8 +210,8 @@ Create a tar archive. Name = filename() - FileList = [Filename|{NameInArchive, binary()},{NameInArchive, - Filename}] + FileList = [Filename|{NameInArchive, FilenameOrBin}] + FilenameOrBin = filename()|binary() Filename = filename() NameInArchive = filename() RetValue = ok|{error,{Name,Reason}} @@ -225,8 +229,8 @@ Create a tar archive with options. Name = filename() - FileList = [Filename|{NameInArchive, binary()},{NameInArchive, - Filename}] + FileList = [Filename|{NameInArchive, FilenameOrBin}] + FilenameOrBin = filename()|binary() Filename = filename() NameInArchive = filename() OptionList = [Option] @@ -275,7 +279,8 @@ extract(Name) -> RetValue Extract all files from a tar file. - Name = filename() + Name = filename() | {binary,binary()} | {file,Fd} + Fd = file_descriptor() RetValue = ok|{error,{Name,Reason}} Reason = term() @@ -294,8 +299,7 @@ extract(Name, OptionList) Extract files from a tar file. - Name = filename() | {binary,Binary} | {file,Fd} - Binary = binary() + Name = filename() | {binary,binary()} | {file,Fd} Fd = file_descriptor() OptionList = [Option] Option = {cwd,Cwd}|{files,FileList}|keep_old_files|verbose|memory @@ -521,7 +525,7 @@ erl_tar:close(TarDesc) table(Name) -> RetValue Retrieve the name of all files in a tar file. - Name = filename() + Name = filename()|{binary,binary()}|{file,file_descriptor()} RetValue = {ok,[string()]}|{error,{Name,Reason}} Reason = term() @@ -535,7 +539,7 @@ erl_tar:close(TarDesc) Retrieve name and information of all files in a tar file. - Name = filename() + Name = filename()|{binary,binary()}|{file,file_descriptor()}

Retrieves the names of all files in the tar file Name.

@@ -546,7 +550,7 @@ erl_tar:close(TarDesc) t(Name) Print the name of each file in a tar file. - Name = filename() + Name = filename()|{binary,binary()}|{file,file_descriptor()}

Prints the names of all files in the tar file Name to the @@ -559,7 +563,7 @@ erl_tar:close(TarDesc) Print name and information for each file in a tar file. - Name = filename() + Name = filename()|{binary,binary()}|{file,file_descriptor()}

Prints names and information about all files in the tar file diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile index d6c0ff8d8d..ed3dfb342c 100644 --- a/lib/stdlib/src/Makefile +++ b/lib/stdlib/src/Makefile @@ -130,7 +130,7 @@ HRL_FILES= \ ../include/qlc.hrl \ ../include/zip.hrl -INTERNAL_HRL_FILES= dets.hrl +INTERNAL_HRL_FILES= dets.hrl erl_tar.hrl ERL_FILES= $(MODULES:%=%.erl) @@ -228,7 +228,7 @@ $(EBIN)/dets_v9.beam: dets.hrl $(EBIN)/erl_bits.beam: ../include/erl_bits.hrl $(EBIN)/erl_compile.beam: ../include/erl_compile.hrl ../../kernel/include/file.hrl $(EBIN)/erl_lint.beam: ../include/erl_bits.hrl -$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl +$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl erl_tar.hrl $(EBIN)/file_sorter.beam: ../../kernel/include/file.hrl $(EBIN)/filelib.beam: ../../kernel/include/file.hrl $(EBIN)/filename.beam: ../../kernel/include/file.hrl diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl index a383a0fc67..086e77cd28 100644 --- a/lib/stdlib/src/erl_tar.erl +++ b/lib/stdlib/src/erl_tar.erl @@ -1,8 +1,8 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 1997-2016. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 1997-2017. All Rights Reserved. +%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,191 +14,245 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% +%% %% %CopyrightEnd% %% +%% This module implements extraction/creation of tar archives. +%% It supports reading most common tar formats, namely V7, STAR, +%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR +%% format, unless it must use PAX headers, in which case it produces PAX +%% format. +%% +%% The following references where used: +%% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +%% http://www.gnu.org/software/tar/manual/html_node/Standard.html +%% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html -module(erl_tar). -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Purpose: Unix tar (tape archive) utility. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - --export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2, - open/2, close/1, add/3, add/4, - t/1, tt/1, format_error/1]). +-export([init/3, + create/2, create/3, + extract/1, extract/2, + table/1, table/2, t/1, tt/1, + open/2, close/1, + add/3, add/4, + format_error/1]). -include_lib("kernel/include/file.hrl"). +-include_lib("erl_tar.hrl"). --record(add_opts, - {read_info, % Fun to use for read file/link info. - chunk_size = 0, % For file reading when sending to sftp. 0=do not chunk - verbose = false :: boolean()}). % Verbose on/off. - -%% Opens a tar archive. - -init(UsrHandle, AccessMode, Fun) when is_function(Fun,2) -> - {ok, {AccessMode,{tar_descriptor,UsrHandle,Fun}}}. - -%%%================================================================ -%%% The open function with friends is to keep the file and binary api of this module -open(Name, Mode) -> - case open_mode(Mode) of - {ok, Access, Raw, Opts} -> - open1(Name, Access, Raw, Opts); - {error, Reason} -> - {error, {Name, Reason}} - end. - -open1({binary,Bin}, read, _Raw, Opts) -> - case file:open(Bin, [ram,binary,read]) of - {ok,File} -> - _ = [ram_file:uncompress(File) || Opts =:= [compressed]], - init(File,read,file_fun()); - Error -> - Error - end; -open1({file, Fd}, read, _Raw, _Opts) -> - init(Fd, read, file_fun()); -open1(Name, Access, Raw, Opts) -> - case file:open(Name, Raw ++ [binary, Access|Opts]) of - {ok, File} -> - init(File, Access, file_fun()); - {error, Reason} -> - {error, {Name, Reason}} - end. - -file_fun() -> - fun(write, {Fd,Data}) -> file:write(Fd, Data); - (position, {Fd,Pos}) -> file:position(Fd, Pos); - (read2, {Fd,Size}) -> file:read(Fd,Size); - (close, Fd) -> file:close(Fd) - end. - -%%% End of file and binary api (except for open_mode/1 downwards -%%%================================================================ - -%% Closes a tar archive. - -close({read, File}) -> - ok = do_close(File); -close({write, File}) -> - PadResult = pad_file(File), - ok = do_close(File), - PadResult; -close(_) -> - {error, einval}. - -%% Adds a file to a tape archive. - -add(File, Name, Options) -> - add(File, Name, Name, Options). -add({write, File}, Name, NameInArchive, Options) -> - Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end}, - add1(File, Name, NameInArchive, add_opts(Options, Opts)); -add({read, _File}, _, _, _) -> - {error, eacces}; -add(_, _, _, _) -> - {error, einval}. - -add_opts([dereference|T], Opts) -> - add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end}); -add_opts([verbose|T], Opts) -> - add_opts(T, Opts#add_opts{verbose=true}); -add_opts([{chunks,N}|T], Opts) -> - add_opts(T, Opts#add_opts{chunk_size=N}); -add_opts([_|T], Opts) -> - add_opts(T, Opts); -add_opts([], Opts) -> - Opts. - -%% Creates a tar file Name containing the given files. - -create(Name, Filenames) -> - create(Name, Filenames, []). - -%% Creates a tar archive Name containing the given files. -%% Accepted options: verbose, compressed, cooked +%% Converts the short error reason to a descriptive string. +-spec format_error(term()) -> string(). +format_error(invalid_tar_checksum) -> + "Checksum failed"; +format_error(bad_header) -> + "Unrecognized tar header format"; +format_error({bad_header, Reason}) -> + lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason])); +format_error({invalid_header, negative_size}) -> + "Invalid header: negative size"; +format_error(invalid_sparse_header_size) -> + "Invalid sparse header: negative size"; +format_error(invalid_sparse_map_entry) -> + "Invalid sparse map entry"; +format_error({invalid_sparse_map_entry, Reason}) -> + lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason])); +format_error(invalid_end_of_archive) -> + "Invalid end of archive"; +format_error(eof) -> + "Unexpected end of file"; +format_error(integer_overflow) -> + "Failed to parse numeric: integer overflow"; +format_error({misaligned_read, Pos}) -> + lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p", + [?BLOCK_SIZE, Pos])); +format_error(invalid_gnu_1_0_sparsemap) -> + "Invalid GNU sparse map (version 1.0)"; +format_error({invalid_gnu_0_1_sparsemap, Format}) -> + lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format])); +format_error({Name,Reason}) -> + lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); +format_error(Atom) when is_atom(Atom) -> + file:format_error(Atom); +format_error(Term) -> + lists:flatten(io_lib:format("~tp", [Term])). -create(Name, FileList, Options) -> - Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) - end, Options), - case open(Name, [write|Mode]) of - {ok, TarFile} -> - Add = fun({NmInA, NmOrBin}) -> - add(TarFile, NmOrBin, NmInA, Options); - (Nm) -> - add(TarFile, Nm, Nm, Options) - end, - Result = foreach_while_ok(Add, FileList), - case {Result, close(TarFile)} of - {ok, Res} -> Res; - {Res, _} -> Res - end; - Reason -> - Reason - end. +%% Initializes a new reader given a custom file handle and I/O wrappers +-spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}. +init(Handle, AccessMode, Fun) when is_function(Fun, 2) -> + Reader = #reader{handle=Handle,access=AccessMode,func=Fun}, + {ok, Pos, Reader2} = do_position(Reader, {cur, 0}), + {ok, Reader2#reader{pos=Pos}}; +init(_Handle, _AccessMode, _Fun) -> + {error, badarg}. +%%%================================================================ %% Extracts all files from the tar file Name. - +-spec extract(open_handle()) -> ok | {error, term()}. extract(Name) -> extract(Name, []). %% Extracts (all) files from the tar file Name. -%% Options accepted: keep_old_files, {files, ListOfFilesToExtract}, verbose, -%% {cwd, AbsoluteDirectory} +%% Options accepted: +%% - cooked: Opens the tar file without mode `raw` +%% - compressed: Uncompresses the tar file when reading +%% - memory: Returns the tar contents as a list of tuples {Name, Bin} +%% - keep_old_files: Extracted files will not overwrite the destination +%% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract +%% - verbose: Prints verbose information about the extraction, +%% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction +-spec extract(open_handle(), [extract_opt()]) -> + ok + | {ok, [{string(), binary()}]} + | {error, term()}. +extract({binary, Bin}, Opts) when is_list(Opts) -> + do_extract({binary, Bin}, Opts); +extract({file, Fd}, Opts) when is_list(Opts) -> + do_extract({file, Fd}, Opts); +extract(#reader{}=Reader, Opts) when is_list(Opts) -> + do_extract(Reader, Opts); +extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) -> + do_extract(Name, Opts). + +do_extract(Handle, Opts) when is_list(Opts) -> + Opts2 = extract_opts(Opts), + Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end, + foldl_read(Handle, fun extract1/4, Acc, Opts2). + +extract1(eof, Reader, _, Acc) when is_list(Acc) -> + {ok, {ok, lists:reverse(Acc)}, Reader}; +extract1(eof, Reader, _, Acc) -> + {ok, Acc, Reader}; +extract1(#tar_header{name=Name,size=Size}=Header, Reader, Opts, Acc) -> + case check_extract(Name, Opts) of + true -> + case do_read(Reader, Size) of + {ok, Bin, Reader2} -> + case write_extracted_element(Header, Bin, Opts) of + ok -> + {ok, Acc, Reader2}; + {ok, NameBin} when is_list(Acc) -> + {ok, [NameBin | Acc], Reader2}; + {error, _} = Err -> + throw(Err) + end; + {error, _} = Err -> + throw(Err) + end; + false -> + {ok, Acc, skip_file(Reader)} + end. -extract(Name, Opts) -> - foldl_read(Name, fun extract1/4, ok, extract_opts(Opts)). +%% Checks if the file Name should be extracted. +check_extract(_, #read_opts{files=all}) -> + true; +check_extract(Name, #read_opts{files=Files}) -> + ordsets:is_element(Name, Files). -%% Returns a list of names of the files in the tar file Name. -%% Options accepted: verbose +%%%================================================================ +%% The following table functions produce a list of information about +%% the files contained in the archive. +-type filename() :: string(). +-type typeflag() :: regular | link | symlink | + char | block | directory | + fifo | reserved | unknown. +-type mode() :: non_neg_integer(). +-type uid() :: non_neg_integer(). +-type gid() :: non_neg_integer(). + +-type tar_entry() :: {filename(), + typeflag(), + non_neg_integer(), + calendar:datetime(), + mode(), + uid(), + gid()}. +%% Returns a list of names of the files in the tar file Name. +-spec table(open_handle()) -> {ok, [string()]} | {error, term()}. table(Name) -> table(Name, []). %% Returns a list of names of the files in the tar file Name. %% Options accepted: compressed, verbose, cooked. - -table(Name, Opts) -> +-spec table(open_handle(), [compressed | verbose | cooked]) -> + {ok, [tar_entry()]} | {error, term()}. +table(Name, Opts) when is_list(Opts) -> foldl_read(Name, fun table1/4, [], table_opts(Opts)). +table1(eof, Reader, _, Result) -> + {ok, {ok, lists:reverse(Result)}, Reader}; +table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) -> + Attrs = table1_attrs(Header, Verbose), + Reader2 = skip_file(Reader), + {ok, [Attrs|Result], Reader2}. + +%% Extracts attributes relevant to table1's output +table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) -> + Type = typeflag(Typeflag), + Name = Header#tar_header.name, + Mtime = Header#tar_header.mtime, + Uid = Header#tar_header.uid, + Gid = Header#tar_header.gid, + Size = Header#tar_header.size, + {Name, Type, Size, Mtime, Mode, Uid, Gid}; +table1_attrs(#tar_header{name=Name}, _Verbose) -> + Name. + +typeflag(?TYPE_REGULAR) -> regular; +typeflag(?TYPE_REGULAR_A) -> regular; +typeflag(?TYPE_GNU_SPARSE) -> regular; +typeflag(?TYPE_CONT) -> regular; +typeflag(?TYPE_LINK) -> link; +typeflag(?TYPE_SYMLINK) -> symlink; +typeflag(?TYPE_CHAR) -> char; +typeflag(?TYPE_BLOCK) -> block; +typeflag(?TYPE_DIR) -> directory; +typeflag(?TYPE_FIFO) -> fifo; +typeflag(_) -> unknown. +%%%================================================================ %% Comments for printing the contents of a tape archive, %% meant to be invoked from the shell. -t(Name) -> +%% Prints each filename in the archive +-spec t(file:filename()) -> ok | {error, term()}. +t(Name) when is_list(Name); is_binary(Name) -> case table(Name) of - {ok, List} -> - lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List); - Error -> - Error + {ok, List} -> + lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List); + Error -> + Error end. +%% Prints verbose information about each file in the archive +-spec tt(open_handle()) -> ok | {error, term()}. tt(Name) -> case table(Name, [verbose]) of - {ok, List} -> - lists:foreach(fun print_header/1, List); - Error -> - Error + {ok, List} -> + lists:foreach(fun print_header/1, List); + Error -> + Error end. +%% Used by tt/1 to print a tar_entry tuple +-spec print_header(tar_entry()) -> ok. print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) -> io:format("~s~s ~4w/~-4w ~7w ~s ~s\n", - [type_to_string(Type), mode_to_string(Mode), - Uid, Gid, Size, time_to_string(Mtime), Name]). + [type_to_string(Type), mode_to_string(Mode), + Uid, Gid, Size, time_to_string(Mtime), Name]). -type_to_string(regular) -> "-"; +type_to_string(regular) -> "-"; type_to_string(directory) -> "d"; -type_to_string(link) -> "l"; -type_to_string(symlink) -> "s"; -type_to_string(char) -> "c"; -type_to_string(block) -> "b"; -type_to_string(fifo) -> "f"; -type_to_string(_) -> "?". - +type_to_string(link) -> "l"; +type_to_string(symlink) -> "s"; +type_to_string(char) -> "c"; +type_to_string(block) -> "b"; +type_to_string(fifo) -> "f"; +type_to_string(unknown) -> "?". + +%% Converts a numeric mode to its human-readable representation mode_to_string(Mode) -> mode_to_string(Mode, "xwrxwrxwr", []). - mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 -> mode_to_string(Mode bsr 1, T, [C|Acc]); mode_to_string(Mode, [_|T], Acc) -> @@ -206,6 +260,7 @@ mode_to_string(Mode, [_|T], Acc) -> mode_to_string(_, [], Acc) -> Acc. +%% Converts a datetime tuple to a readable string time_to_string({{Y, Mon, Day}, {H, Min, _}}) -> io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]). @@ -225,809 +280,1608 @@ month(10) -> "Oct"; month(11) -> "Nov"; month(12) -> "Dec". -%% Converts the short error reason to a descriptive string. +%%%================================================================ +%% The open function with friends is to keep the file and binary api of this module +-type open_handle() :: file:filename() + | {binary, binary()} + | {file, term()}. +-spec open(open_handle(), [write | compressed | cooked]) -> + {ok, reader()} | {error, term()}. +open({binary, Bin}, Mode) when is_binary(Bin) -> + do_open({binary, Bin}, Mode); +open({file, Fd}, Mode) -> + do_open({file, Fd}, Mode); +open(Name, Mode) when is_list(Name); is_binary(Name) -> + do_open(Name, Mode). + +do_open(Name, Mode) when is_list(Mode) -> + case open_mode(Mode) of + {ok, Access, Raw, Opts} -> + open1(Name, Access, Raw, Opts); + {error, Reason} -> + {error, {Name, Reason}} + end. -format_error(bad_header) -> "Bad directory header"; -format_error(eof) -> "Unexpected end of file"; -format_error(symbolic_link_too_long) -> "Symbolic link too long"; -format_error({Name,Reason}) -> - lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); -format_error(Atom) when is_atom(Atom) -> - file:format_error(Atom); -format_error(Term) -> - lists:flatten(io_lib:format("~tp", [Term])). +open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) -> + case file:open(Bin, [ram,binary,read]) of + {ok,File} -> + _ = [ram_file:uncompress(File) || Opts =:= [compressed]], + {ok, #reader{handle=File,access=read,func=fun file_op/2}}; + Error -> + Error + end; +open1({file, Fd}, read, _Raw, _Opts) -> + Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, + case do_position(Reader, {cur, 0}) of + {ok, Pos, Reader2} -> + {ok, Reader2#reader{pos=Pos}}; + {error, _} = Err -> + Err + end; +open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) -> + case file:open(Name, Raw ++ [binary, Access|Opts]) of + {ok, File} -> + {ok, #reader{handle=File,access=Access,func=fun file_op/2}}; + {error, Reason} -> + {error, {Name, Reason}} + end. +open_mode(Mode) -> + open_mode(Mode, false, [raw], []). -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%% -%%% Useful definitions (also start of implementation). -%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% Offset for fields in the tar header. -%% Note that these offsets are ZERO-based as in the POSIX standard -%% document, while binaries use ONE-base offset. Caveat Programmer. - --define(th_name, 0). --define(th_mode, 100). --define(th_uid, 108). --define(th_gid, 116). --define(th_size, 124). --define(th_mtime, 136). --define(th_chksum, 148). --define(th_typeflag, 156). --define(th_linkname, 157). --define(th_magic, 257). --define(th_version, 263). --define(th_prefix, 345). - -%% Length of these fields. - --define(th_name_len, 100). --define(th_mode_len, 8). --define(th_uid_len, 8). --define(th_gid_len, 8). --define(th_size_len, 12). --define(th_mtime_len, 12). --define(th_chksum_len, 8). --define(th_linkname_len, 100). --define(th_magic_len, 6). --define(th_version_len, 2). --define(th_prefix_len, 167). - --record(tar_header, - {name, % Name of file. - mode, % Mode bits. - uid, % User id. - gid, % Group id. - size, % Size of file - mtime, % Last modified (seconds since - % Jan 1, 1970). - chksum, % Checksum of header. - typeflag = [], % Type of file. - linkname = [], % Name of link. - filler = [], - prefix}). % Filename prefix. - --define(record_size, 512). --define(block_size, (512*20)). - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%% -%%% Adding members to a tar archive. -%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -add1(TarFile, Bin, NameInArchive, Opts) when is_binary(Bin) -> - Now = calendar:now_to_local_time(erlang:timestamp()), - Info = #file_info{size = byte_size(Bin), - type = regular, - access = read_write, - atime = Now, - mtime = Now, - ctime = Now, - mode = 8#100644, - links = 1, - major_device = 0, - minor_device = 0, - inode = 0, - uid = 0, - gid = 0}, - Header = create_header(NameInArchive, Info), - add1(TarFile, NameInArchive, Header, Bin, Opts); -add1(TarFile, Name, NameInArchive, Opts) -> - case read_file_and_info(Name, Opts) of - {ok, Bin, Info} when Info#file_info.type =:= regular -> - Header = create_header(NameInArchive, Info), - add1(TarFile, Name, Header, Bin, Opts); - {ok, PointsTo, Info} when Info#file_info.type =:= symlink -> - if - length(PointsTo) > 100 -> - {error,{PointsTo,symbolic_link_too_long}}; - true -> - Info2 = Info#file_info{size=0}, - Header = create_header(NameInArchive, Info2, PointsTo), - add1(TarFile, Name, Header, list_to_binary([]), Opts) - end; - {ok, _, Info} when Info#file_info.type =:= directory -> - add_directory(TarFile, Name, NameInArchive, Info, Opts); - {ok, _, #file_info{type=Type}} -> - {error, {bad_file_type, Name, Type}}; - {error, Reason} -> - {error, {Name, Reason}} +open_mode(read, _, Raw, _) -> + {ok, read, Raw, []}; +open_mode(write, _, Raw, _) -> + {ok, write, Raw, []}; +open_mode([read|Rest], false, Raw, Opts) -> + open_mode(Rest, read, Raw, Opts); +open_mode([write|Rest], false, Raw, Opts) -> + open_mode(Rest, write, Raw, Opts); +open_mode([compressed|Rest], Access, Raw, Opts) -> + open_mode(Rest, Access, Raw, [compressed|Opts]); +open_mode([cooked|Rest], Access, _Raw, Opts) -> + open_mode(Rest, Access, [], Opts); +open_mode([], Access, Raw, Opts) -> + {ok, Access, Raw, Opts}; +open_mode(_, _, _, _) -> + {error, einval}. + +file_op(write, {Fd, Data}) -> + file:write(Fd, Data); +file_op(position, {Fd, Pos}) -> + file:position(Fd, Pos); +file_op(read2, {Fd, Size}) -> + file:read(Fd, Size); +file_op(close, Fd) -> + file:close(Fd). + +%% Closes a tar archive. +-spec close(reader()) -> ok | {error, term()}. +close(#reader{access=read}=Reader) -> + ok = do_close(Reader); +close(#reader{access=write}=Reader) -> + {ok, Reader2} = pad_file(Reader), + ok = do_close(Reader2), + ok; +close(_) -> + {error, einval}. + +pad_file(#reader{pos=Pos}=Reader) -> + %% There must be at least two zero blocks at the end. + PadCurrent = skip_padding(Pos+?BLOCK_SIZE), + Padding = <<0:PadCurrent/unit:8>>, + do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]). + + +%%%================================================================ +%% Creation/modification of tar archives + +%% Creates a tar file Name containing the given files. +-spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}. +create(Name, FileList) when is_list(Name); is_binary(Name) -> + create(Name, FileList, []). + +%% Creates a tar archive Name containing the given files. +%% Accepted options: verbose, compressed, cooked +-spec create(file:filename(), filelist(), [create_opt()]) -> + ok | {error, term()} | {error, {string(), term()}}. +create(Name, FileList, Options) when is_list(Name); is_binary(Name) -> + Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) + end, Options), + case open(Name, [write|Mode]) of + {ok, TarFile} -> + do_create(TarFile, FileList, Options); + {error, _} = Err -> + Err end. -add1(Tar, Name, Header, chunked, Options) -> - add_verbose(Options, "a ~ts [chunked ", [Name]), - try - ok = do_write(Tar, Header), - {ok,D} = file:open(Name, [read,binary]), - {ok,NumBytes} = add_read_write_chunks(D, Tar, Options#add_opts.chunk_size, 0, Options), - _ = file:close(D), - ok = do_write(Tar, padding(NumBytes,?record_size)) - of - ok -> - add_verbose(Options, "~n", []), - ok - catch - error:{badmatch,{error,Error}} -> - add_verbose(Options, "~n", []), - {error,{Name,Error}} +do_create(TarFile, [], _Opts) -> + close(TarFile); +do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) -> + case add(TarFile, NameOrBin, NameInArchive, Opts) of + ok -> + do_create(TarFile, Rest, Opts); + {error, _} = Err -> + _ = close(TarFile), + Err end; -add1(Tar, Name, Header, Bin, Options) -> - add_verbose(Options, "a ~ts~n", [Name]), - do_write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]). - -add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes, Options) -> - case file:read(D, ChunkSize) of - {ok,Bin} -> - ok = do_write(Tar, Bin), - add_verbose(Options, ".", []), - add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes+byte_size(Bin), Options); - eof -> - add_verbose(Options, "]", []), - {ok,SumNumBytes}; - Other -> - Other +do_create(TarFile, [Name|Rest], Opts) -> + case add(TarFile, Name, Name, Opts) of + ok -> + do_create(TarFile, Rest, Opts); + {error, _} = Err -> + _ = close(TarFile), + Err end. -add_directory(TarFile, DirName, NameInArchive, Info, Options) -> +%% Adds a file to a tape archive. +-type add_type() :: string() + | {string(), string()} + | {string(), binary()}. +-spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}. +add(Reader, {NameInArchive, Name}, Opts) + when is_list(NameInArchive), is_list(Name) -> + do_add(Reader, Name, NameInArchive, Opts); +add(Reader, {NameInArchive, Bin}, Opts) + when is_list(NameInArchive), is_binary(Bin) -> + do_add(Reader, Bin, NameInArchive, Opts); +add(Reader, Name, Opts) when is_list(Name) -> + do_add(Reader, Name, Name, Opts). + + +-spec add(reader(), string() | binary(), string(), [add_opt()]) -> + ok | {error, term()}. +add(Reader, NameOrBin, NameInArchive, Options) + when is_list(NameOrBin); is_binary(NameOrBin), + is_list(NameInArchive), is_list(Options) -> + do_add(Reader, NameOrBin, NameInArchive, Options). + +do_add(#reader{access=write}=Reader, Name, NameInArchive, Options) + when is_list(NameInArchive), is_list(Options) -> + Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end}, + add1(Reader, Name, NameInArchive, add_opts(Options, Opts)); +do_add(#reader{access=read},_,_,_) -> + {error, eacces}; +do_add(Reader,_,_,_) -> + {error, {badarg, Reader}}. + +add_opts([dereference|T], Opts) -> + add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end}); +add_opts([verbose|T], Opts) -> + add_opts(T, Opts#add_opts{verbose=true}); +add_opts([{chunks,N}|T], Opts) -> + add_opts(T, Opts#add_opts{chunk_size=N}); +add_opts([_|T], Opts) -> + add_opts(T, Opts); +add_opts([], Opts) -> + Opts. + +add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts) + when is_list(Name) -> + Res = case ReadInfo(Name) of + {error, Reason0} -> + {error, {Name, Reason0}}; + {ok, #file_info{type=symlink}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + {ok, Linkname} = file:read_link(Name), + Header = fileinfo_to_header(NameInArchive, Fi, Linkname), + add_header(Reader, Header, Opts); + {ok, #file_info{type=regular}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Header = fileinfo_to_header(NameInArchive, Fi, false), + {ok, Reader2} = add_header(Reader, Header, Opts), + FileSize = Header#tar_header.size, + {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts), + Padding = skip_padding(FileSize), + Pad = <<0:Padding/unit:8>>, + do_write(Reader3, Pad); + {ok, #file_info{type=directory}=Fi} -> + add_directory(Reader, Name, NameInArchive, Fi, Opts); + {ok, #file_info{}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Header = fileinfo_to_header(NameInArchive, Fi, false), + add_header(Reader, Header, Opts) + end, + case Res of + ok -> ok; + {ok, _Reader} -> ok; + {error, _Reason} = Err -> Err + end; +add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Now = calendar:now_to_local_time(erlang:timestamp()), + Header = #tar_header{ + name = NameInArchive, + size = byte_size(Bin), + typeflag = ?TYPE_REGULAR, + atime = Now, + mtime = Now, + ctime = Now, + mode = 8#100644}, + {ok, Reader2} = add_header(Reader, Header, Opts), + Padding = skip_padding(byte_size(Bin)), + Data = [Bin, <<0:Padding/unit:8>>], + case do_write(Reader2, Data) of + {ok, _Reader3} -> ok; + {error, Reason} -> {error, {NameInArchive, Reason}} + end. + +add_directory(Reader, DirName, NameInArchive, Info, Opts) -> case file:list_dir(DirName) of - {ok, []} -> - add_verbose(Options, "a ~ts~n", [DirName]), - Header = create_header(NameInArchive, Info), - do_write(TarFile, Header); - {ok, Files} -> - Add = fun (File) -> - add1(TarFile, - filename:join(DirName, File), - filename:join(NameInArchive, File), - Options) end, - foreach_while_ok(Add, Files); - {error, Reason} -> - {error, {DirName, Reason}} + {ok, []} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Header = fileinfo_to_header(NameInArchive, Info, false), + add_header(Reader, Header, Opts); + {ok, Files} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + try add_files(Reader, Files, DirName, NameInArchive, Opts) of + ok -> ok; + {error, _} = Err -> Err + catch + throw:{error, {_Name, _Reason}} = Err -> Err; + throw:{error, Reason} -> {error, {DirName, Reason}} + end; + {error, Reason} -> + {error, {DirName, Reason}} end. - -%% Creates a header for file in a tar file. - -create_header(Name, Info) -> - create_header(Name, Info, []). -create_header(Name, #file_info {mode=Mode, uid=Uid, gid=Gid, - size=Size, mtime=Mtime0, type=Type}, Linkname) -> - Mtime = posix_time(erlang:localtime_to_universaltime(Mtime0)), - {Prefix,Suffix} = split_filename(Name), - H0 = [to_string(Suffix, 100), - to_octal(Mode, 8), - to_octal(Uid, 8), - to_octal(Gid, 8), - to_octal(Size, ?th_size_len), - to_octal(Mtime, ?th_mtime_len), - <<" ">>, - file_type(Type), - to_string(Linkname, ?th_linkname_len), - "ustar",0, - "00", - zeroes(?th_prefix-?th_version-?th_version_len), - to_string(Prefix, ?th_prefix_len)], - H = list_to_binary(H0), - 512 = byte_size(H), %Assertion. - ChksumString = to_octal(checksum(H), 6, [0,$\s]), - <> = H, - [Before,ChksumString,After]. - -file_type(regular) -> $0; -file_type(symlink) -> $2; -file_type(directory) -> $5. - -to_octal(Int, Count) when Count > 1 -> - to_octal(Int, Count-1, [0]). - -to_octal(_, 0, Result) -> Result; -to_octal(Int, Count, Result) -> - to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]). - -to_string(Str0, Count) -> - Str = case file:native_name_encoding() of - utf8 -> - unicode:characters_to_binary(Str0); - latin1 -> - list_to_binary(Str0) - end, - case byte_size(Str) of - Size when Size < Count -> - [Str|zeroes(Count-Size)]; - _ -> Str + +add_files(_Reader, [], _Dir, _DirInArchive, _Opts) -> + ok; +add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) -> + FullName = filename:join(Dir, Name), + NameInArchive = filename:join(DirInArchive, Name), + Res = case Info(FullName) of + {error, Reason} -> + {error, {FullName, Reason}}; + {ok, #file_info{type=directory}=Fi} -> + add_directory(Reader, FullName, NameInArchive, Fi, Opts); + {ok, #file_info{type=symlink}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + {ok, Linkname} = file:read_link(FullName), + Header = fileinfo_to_header(NameInArchive, Fi, Linkname), + add_header(Reader, Header, Opts); + {ok, #file_info{type=regular}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Header = fileinfo_to_header(NameInArchive, Fi, false), + {ok, Reader2} = add_header(Reader, Header, Opts), + FileSize = Header#tar_header.size, + {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts), + Padding = skip_padding(FileSize), + Pad = <<0:Padding/unit:8>>, + do_write(Reader3, Pad); + {ok, #file_info{}=Fi} -> + add_verbose(Opts, "a ~ts~n", [NameInArchive]), + Header = fileinfo_to_header(NameInArchive, Fi, false), + add_header(Reader, Header, Opts) + end, + case Res of + ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts); + {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts); + {error, _} = Err -> Err end. -%% Pads out end of file. - -pad_file(File) -> - {ok,Position} = do_position(File, {cur,0}), - %% There must be at least two zero records at the end. - Fill = case ?block_size - (Position rem ?block_size) of - Fill0 when Fill0 < 2*?record_size -> - %% We need to another block here to ensure that there - %% are at least two zero records at the end. - Fill0 + ?block_size; - Fill0 -> - %% Large enough. - Fill0 - end, - do_write(File, zeroes(Fill)). - -split_filename(Name) when length(Name) =< ?th_name_len -> - {"", Name}; -split_filename(Name0) -> - split_filename(lists:reverse(filename:split(Name0)), [], [], 0). - -split_filename([Comp|Rest], Prefix, Suffix, Len) - when Len+length(Comp) < ?th_name_len -> - split_filename(Rest, Prefix, [Comp|Suffix], Len+length(Comp)+1); -split_filename([Comp|Rest], Prefix, Suffix, Len) -> - split_filename(Rest, [Comp|Prefix], Suffix, Len+length(Comp)+1); -split_filename([], Prefix, Suffix, _) -> - {filename:join(Prefix),filename:join(Suffix)}. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%% -%%% Retrieving files from a tape archive. -%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% Options used when reading a tar archive. - --record(read_opts, - {cwd :: string(), % Current working directory. - keep_old_files = false :: boolean(), % Owerwrite or not. - files = all, % Set of files to extract - % (or all). - output = file :: 'file' | 'memory', - open_mode = [], % Open mode options. - verbose = false :: boolean()}). % Verbose on/off. +format_string(String, Size) when length(String) > Size -> + throw({error, {write_string, field_too_long}}); +format_string(String, Size) -> + Ascii = to_ascii(String), + if byte_size(Ascii) < Size -> + [Ascii, 0]; + true -> + Ascii + end. -extract_opts(List) -> - extract_opts(List, default_options()). +format_octal(Octal) -> + iolist_to_binary(io_lib:fwrite("~.8B", [Octal])). + +add_header(#reader{}=Reader, #tar_header{}=Header, Opts) -> + {ok, Iodata} = build_header(Header, Opts), + do_write(Reader, Iodata). + +write_to_block(Block, IoData, Start) when is_list(IoData) -> + write_to_block(Block, iolist_to_binary(IoData), Start); +write_to_block(Block, Bin, Start) when is_binary(Bin) -> + Size = byte_size(Bin), + <> = Block, + <>. + +build_header(#tar_header{}=Header, Opts) -> + #tar_header{ + name=Name, + mode=Mode, + uid=Uid, + gid=Gid, + size=Size, + typeflag=Type, + linkname=Linkname, + uname=Uname, + gname=Gname, + devmajor=Devmaj, + devminor=Devmin + } = Header, + Mtime = datetime_to_posix(Header#tar_header.mtime), + + Block0 = ?ZERO_BLOCK, + {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}), + Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode), + {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0), + {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1), + {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2), + {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3), + {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <>, ?PAX_NONE, Pax4), + {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN, + Linkname, ?PAX_LINKPATH, Pax5), + {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN, + Uname, ?PAX_UNAME, Pax6), + {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN, + Gname, ?PAX_GNAME, Pax7), + {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN, + Devmaj, ?PAX_NONE, Pax8), + {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN, + Devmin, ?PAX_NONE, Pax9), + {Block13, Pax11} = set_path(Block12, Pax10), + PaxEntry = case maps:size(Pax11) of + 0 -> []; + _ -> build_pax_entry(Header, Pax11, Opts) + end, + Block14 = set_format(Block13, ?FORMAT_USTAR), + Block15 = set_checksum(Block14), + {ok, [PaxEntry, Block15]}. + +set_path(Block0, Pax) -> + %% only use ustar header when name is too long + case maps:get(?PAX_PATH, Pax, nil) of + nil -> + {Block0, Pax}; + PaxPath -> + case split_ustar_path(PaxPath) of + {ok, UstarName, UstarPrefix} -> + {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, + UstarName, ?PAX_NONE, #{}), + {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN, + UstarPrefix, ?PAX_NONE, #{}), + {Block2, maps:remove(?PAX_PATH, Pax)}; + false -> + {Block0, Pax} + end + end. -table_opts(List) -> - read_opts(List, default_options()). +set_format(Block0, Format) + when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX -> + Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC), + write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION); +set_format(_Block, Format) -> + throw({error, {invalid_format, Format}}). + +set_checksum(Block) -> + Checksum = compute_checksum(Block), + write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum). + +build_pax_entry(Header, PaxAttrs, Opts) -> + Path = Header#tar_header.name, + Filename = filename:basename(Path), + Dir = filename:dirname(Path), + Path2 = filename:join([Dir, "PaxHeaders.0", Filename]), + AsciiPath = to_ascii(Path2), + Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN -> + binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1); + true -> + AsciiPath + end, + Keys = maps:keys(PaxAttrs), + SortedKeys = lists:sort(Keys), + PaxFile = build_pax_file(SortedKeys, PaxAttrs), + Size = byte_size(PaxFile), + Padding = (?BLOCK_SIZE - + (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE, + Pad = <<0:Padding/unit:8>>, + PaxHeader = #tar_header{ + name=unicode:characters_to_list(Path3), + size=Size, + mtime=Header#tar_header.mtime, + atime=Header#tar_header.atime, + ctime=Header#tar_header.ctime, + typeflag=?TYPE_X_HEADER + }, + {ok, PaxHeaderData} = build_header(PaxHeader, Opts), + [PaxHeaderData, PaxFile, Pad]. + +build_pax_file(Keys, PaxAttrs) -> + build_pax_file(Keys, PaxAttrs, []). +build_pax_file([], _, Acc) -> + unicode:characters_to_binary(Acc); +build_pax_file([K|Rest], Attrs, Acc) -> + V = maps:get(K, Attrs), + Size = sizeof(K) + sizeof(V) + 3, + Size2 = sizeof(Size) + Size, + Key = to_string(K), + Value = to_string(V), + Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])), + if byte_size(Record) =/= Size2 -> + Size3 = byte_size(Record), + Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]), + build_pax_file(Rest, Attrs, [Acc, Record2]); + true -> + build_pax_file(Rest, Attrs, [Acc, Record]) + end. -default_options() -> - {ok, Cwd} = file:get_cwd(), - #read_opts{cwd=Cwd}. +sizeof(Bin) when is_binary(Bin) -> + byte_size(Bin); +sizeof(List) when is_list(List) -> + length(List); +sizeof(N) when is_integer(N) -> + byte_size(integer_to_binary(N)); +sizeof(N) when is_float(N) -> + byte_size(float_to_binary(N)). + +to_string(Bin) when is_binary(Bin) -> + unicode:characters_to_list(Bin); +to_string(List) when is_list(List) -> + List; +to_string(N) when is_integer(N) -> + integer_to_list(N); +to_string(N) when is_float(N) -> + float_to_list(N). + +split_ustar_path(Path) -> + Len = length(Path), + NotAscii = not is_ascii(Path), + if Len =< ?V7_NAME_LEN; NotAscii -> + false; + true -> + PathBin = binary:list_to_bin(Path), + case binary:split(PathBin, [<<$/>>], [global, trim_all]) of + [Part] when byte_size(Part) >= ?V7_NAME_LEN -> + false; + Parts -> + case lists:last(Parts) of + Name when byte_size(Name) >= ?V7_NAME_LEN -> + false; + Name -> + Parts2 = lists:sublist(Parts, length(Parts) - 1), + join_split_ustar_path(Parts2, {ok, Name, nil}) + end + end + end. -%% Parse options for extract. +join_split_ustar_path([], Acc) -> + Acc; +join_split_ustar_path([Part|_], {ok, _, nil}) + when byte_size(Part) > ?USTAR_PREFIX_LEN -> + false; +join_split_ustar_path([Part|_], {ok, _Name, Acc}) + when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN -> + false; +join_split_ustar_path([Part|Rest], {ok, Name, nil}) -> + join_split_ustar_path(Rest, {ok, Name, Part}); +join_split_ustar_path([Part|Rest], {ok, Name, Acc}) -> + join_split_ustar_path(Rest, {ok, Name, <>}). + +datetime_to_posix(DateTime) -> + Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH), + Secs = calendar:datetime_to_gregorian_seconds(DateTime), + case Secs - Epoch of + N when N < 0 -> 0; + N -> N + end. -extract_opts([keep_old_files|Rest], Opts) -> - extract_opts(Rest, Opts#read_opts{keep_old_files=true}); -extract_opts([{cwd, Cwd}|Rest], Opts) -> - extract_opts(Rest, Opts#read_opts{cwd=Cwd}); -extract_opts([{files, Files}|Rest], Opts) -> - Set = ordsets:from_list(Files), - extract_opts(Rest, Opts#read_opts{files=Set}); -extract_opts([memory|Rest], Opts) -> - extract_opts(Rest, Opts#read_opts{output=memory}); -extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> - extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); -extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> - extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); -extract_opts([verbose|Rest], Opts) -> - extract_opts(Rest, Opts#read_opts{verbose=true}); -extract_opts([Other|Rest], Opts) -> - extract_opts(Rest, read_opts([Other], Opts)); -extract_opts([], Opts) -> - Opts. +write_octal(Block, Pos, Size, X) -> + Octal = zero_pad(format_octal(X), Size-1), + if byte_size(Octal) < Size -> + write_to_block(Block, Octal, Pos); + true -> + throw({error, {write_failed, octal_field_too_long}}) + end. -%% Common options for all read operations. +write_string(Block, Pos, Size, Str, PaxAttr, Pax0) -> + NotAscii = not is_ascii(Str), + if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) -> + Pax1 = maps:put(PaxAttr, Str, Pax0), + {Block, Pax1}; + true -> + Formatted = format_string(Str, Size), + {write_to_block(Block, Formatted, Pos), Pax0} + end. +write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) -> + %% attempt octal + Octal = zero_pad(format_octal(X), Size-1), + if byte_size(Octal) < Size -> + {write_to_block(Block, [Octal, 0], Pos), Pax0}; + PaxAttr =/= ?PAX_NONE -> + Pax1 = maps:put(PaxAttr, X, Pax0), + {Block, Pax1}; + true -> + throw({error, {write_failed, numeric_field_too_long}}) + end. -read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> - read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); -read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> - read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); -read_opts([verbose|Rest], Opts) -> - read_opts(Rest, Opts#read_opts{verbose=true}); -read_opts([_|Rest], Opts) -> - read_opts(Rest, Opts); -read_opts([], Opts) -> - Opts. +zero_pad(Str, Size) when byte_size(Str) >= Size -> + Str; +zero_pad(Str, Size) -> + Padding = Size - byte_size(Str), + Pad = binary:copy(<<$0>>, Padding), + <>. -foldl_read({AccessMode,TD={tar_descriptor,_UsrHandle,_AccessFun}}, Fun, Accu, Opts) -> - case AccessMode of - read -> - foldl_read0(TD, Fun, Accu, Opts); - _ -> - {error,{read_mode_expected,AccessMode}} - end; -foldl_read(TarName, Fun, Accu, Opts) -> - case open(TarName, [read|Opts#read_opts.open_mode]) of - {ok, {read, File}} -> - Result = foldl_read0(File, Fun, Accu, Opts), - ok = do_close(File), - Result; - Error -> - Error + +%%%================================================================ +%% Functions for creating or modifying tar archives + +read_block(Reader) -> + case do_read(Reader, ?BLOCK_SIZE) of + eof -> + throw({error, eof}); + %% Two zero blocks mark the end of the archive + {ok, ?ZERO_BLOCK, Reader1} -> + case do_read(Reader1, ?BLOCK_SIZE) of + eof -> + % This is technically a malformed end-of-archive marker, + % as two ZERO_BLOCKs are expected as the marker, + % but if we've already made it this far, we should just ignore it + eof; + {ok, ?ZERO_BLOCK, _Reader2} -> + eof; + {ok, _Block, _Reader2} -> + throw({error, invalid_end_of_archive}); + {error,_} = Err -> + throw(Err) + end; + {ok, Block, Reader1} when is_binary(Block) -> + {ok, Block, Reader1}; + {error, _} = Err -> + throw(Err) end. -foldl_read0(File, Fun, Accu, Opts) -> - case catch foldl_read1(Fun, Accu, File, Opts) of - {'EXIT', Reason} -> - exit(Reason); - {error, {Reason, Format, Args}} -> - read_verbose(Opts, Format, Args), - {error, Reason}; - {error, Reason} -> - {error, Reason}; - Ok -> - Ok +get_header(#reader{}=Reader) -> + case read_block(Reader) of + eof -> + eof; + {ok, Block, Reader1} -> + convert_header(Block, Reader1) end. -foldl_read1(Fun, Accu0, File, Opts) -> - case get_header(File) of - eof -> - Fun(eof, File, Opts, Accu0); - Header -> - {ok, NewAccu} = Fun(Header, File, Opts, Accu0), - foldl_read1(Fun, NewAccu, File, Opts) +%% Converts the tar header to a record. +to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + #header_v7{ + name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN), + mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN), + uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN), + gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN), + size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN), + mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN), + checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN), + typeflag=binary:at(Bin, ?V7_TYPE), + linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN) + }; +to_v7(_) -> + {error, header_block_too_small}. + +to_gnu(#header_v7{}=V7, Bin) + when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + #header_gnu{ + header_v7=V7, + magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN), + version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN), + uname=binary_part(Bin, 265, 32), + gname=binary_part(Bin, 297, 32), + devmajor=binary_part(Bin, 329, 8), + devminor=binary_part(Bin, 337, 8), + atime=binary_part(Bin, 345, 12), + ctime=binary_part(Bin, 357, 12), + sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)), + real_size=binary_part(Bin, 483, 12) + }. + +to_star(#header_v7{}=V7, Bin) + when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + #header_star{ + header_v7=V7, + magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), + version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), + uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), + gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), + devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), + devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), + prefix=binary_part(Bin, 345, 131), + atime=binary_part(Bin, 476, 12), + ctime=binary_part(Bin, 488, 12), + trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN) + }. + +to_ustar(#header_v7{}=V7, Bin) + when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + #header_ustar{ + header_v7=V7, + magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN), + version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN), + uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN), + gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN), + devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN), + devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN), + prefix=binary_part(Bin, 345, 155) + }. + +to_sparse_array(Bin) when is_binary(Bin) -> + MaxEntries = byte_size(Bin) div 24, + IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries), + Entries = parse_sparse_entries(Bin, MaxEntries-1, []), + #sparse_array{ + entries=Entries, + max_entries=MaxEntries, + is_extended=IsExtended + }. + +parse_sparse_entries(<<>>, _, Acc) -> + Acc; +parse_sparse_entries(_, -1, Acc) -> + Acc; +parse_sparse_entries(Bin, N, Acc) -> + case to_sparse_entry(binary_part(Bin, N*24, 24)) of + nil -> + parse_sparse_entries(Bin, N-1, Acc); + Entry = #sparse_entry{} -> + parse_sparse_entries(Bin, N-1, [Entry|Acc]) end. -table1(eof, _, _, Result) -> - {ok, lists:reverse(Result)}; -table1(Header = #tar_header{}, File, #read_opts{verbose=true}, Result) -> - #tar_header{name=Name, size=Size, mtime=Mtime, typeflag=Type, - mode=Mode, uid=Uid, gid=Gid} = Header, - skip(File, Size), - {ok, [{Name, Type, Size, posix_to_erlang_time(Mtime), Mode, Uid, Gid}|Result]}; -table1(#tar_header{name=Name, size=Size}, File, _, Result) -> - skip(File, Size), - {ok, [Name|Result]}. - -extract1(eof, _, _, Acc) -> - if - is_list(Acc) -> - {ok, lists:reverse(Acc)}; - true -> - Acc - end; -extract1(Header, File, Opts, Acc) -> - Name = Header#tar_header.name, - case check_extract(Name, Opts) of - true -> - {ok, Bin} = get_element(File, Header), - case write_extracted_element(Header, Bin, Opts) of - ok -> - {ok, Acc}; - {ok, NameBin} when is_list(Acc) -> - {ok, [NameBin | Acc]}; - {ok, NameBin} when Acc =:= ok -> - {ok, [NameBin]} - end; - false -> - ok = skip(File, Header#tar_header.size), - {ok, Acc} +-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>). +to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 -> + OffsetBin = binary_part(Bin, 0, 12), + NumBytesBin = binary_part(Bin, 12, 12), + case {OffsetBin, NumBytesBin} of + {?EMPTY_ENTRY, ?EMPTY_ENTRY} -> + nil; + _ -> + #sparse_entry{ + offset=parse_numeric(OffsetBin), + num_bytes=parse_numeric(NumBytesBin)} end. -%% Checks if the file Name should be extracted. +-spec get_format(binary()) -> {ok, pos_integer(), header_v7()} + | ?FORMAT_UNKNOWN + | {error, term()}. +get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + do_get_format(to_v7(Bin), Bin). + +do_get_format({error, _} = Err, _Bin) -> + Err; +do_get_format(#header_v7{}=V7, Bin) + when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + Checksum = parse_octal(V7#header_v7.checksum), + Chk1 = compute_checksum(Bin), + Chk2 = compute_signed_checksum(Bin), + if Checksum =/= Chk1 andalso Checksum =/= Chk2 -> + ?FORMAT_UNKNOWN; + true -> + %% guess magic + Ustar = to_ustar(V7, Bin), + Star = to_star(V7, Bin), + Magic = Ustar#header_ustar.magic, + Version = Ustar#header_ustar.version, + Trailer = Star#header_star.trailer, + Format = if + Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR -> + ?FORMAT_STAR; + Magic =:= ?MAGIC_USTAR -> + ?FORMAT_USTAR; + Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU -> + ?FORMAT_GNU; + true -> + ?FORMAT_V7 + end, + {ok, Format, V7} + end. -check_extract(_, #read_opts{files=all}) -> +unpack_format(Format, #header_v7{}=V7, Bin, Reader) + when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> + Mtime = posix_to_erlang_time(parse_numeric(V7#header_v7.mtime)), + Header0 = #tar_header{ + name=parse_string(V7#header_v7.name), + mode=parse_numeric(V7#header_v7.mode), + uid=parse_numeric(V7#header_v7.uid), + gid=parse_numeric(V7#header_v7.gid), + size=parse_numeric(V7#header_v7.size), + mtime=Mtime, + atime=Mtime, + ctime=Mtime, + typeflag=V7#header_v7.typeflag, + linkname=parse_string(V7#header_v7.linkname) + }, + Typeflag = Header0#tar_header.typeflag, + Header1 = if Format > ?FORMAT_V7 -> + unpack_modern(Format, V7, Bin, Header0); + true -> + Name = Header0#tar_header.name, + Header0#tar_header{name=safe_join_path("", Name)} + end, + HeaderOnly = is_header_only_type(Typeflag), + Header2 = if HeaderOnly -> + Header1#tar_header{size=0}; + true -> + Header1 + end, + if Typeflag =:= ?TYPE_GNU_SPARSE -> + Gnu = to_gnu(V7, Bin), + RealSize = parse_numeric(Gnu#header_gnu.real_size), + {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader), + Header3 = Header2#tar_header{size=RealSize}, + {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)}; + true -> + FileReader = #reg_file_reader{ + handle=Reader, + num_bytes=Header2#tar_header.size, + size=Header2#tar_header.size, + pos = 0 + }, + {Header2, FileReader} + end. + +unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0) + when is_binary(Bin) -> + Typeflag = Header0#tar_header.typeflag, + Ustar = to_ustar(V7, Bin), + H0 = Header0#tar_header{ + uname=parse_string(Ustar#header_ustar.uname), + gname=parse_string(Ustar#header_ustar.gname)}, + H1 = if Typeflag =:= ?TYPE_CHAR + orelse Typeflag =:= ?TYPE_BLOCK -> + Ma = parse_numeric(Ustar#header_ustar.devmajor), + Mi = parse_numeric(Ustar#header_ustar.devminor), + H0#tar_header{ + devmajor=Ma, + devminor=Mi + }; + true -> + H0 + end, + {Prefix, H2} = case Format of + ?FORMAT_USTAR -> + {parse_string(Ustar#header_ustar.prefix), H1}; + ?FORMAT_STAR -> + Star = to_star(V7, Bin), + Prefix0 = parse_string(Star#header_star.prefix), + Atime0 = Star#header_star.atime, + Atime = posix_to_erlang_time(parse_numeric(Atime0)), + Ctime0 = Star#header_star.ctime, + Ctime = posix_to_erlang_time(parse_numeric(Ctime0)), + {Prefix0, H1#tar_header{ + atime=Atime, + ctime=Ctime + }}; + _ -> + {"", H1} + end, + Name = H2#tar_header.name, + H2#tar_header{name=safe_join_path(Prefix, Name)}. + + +safe_join_path([], Name) -> + strip_slashes(Name, both); +safe_join_path(Prefix, []) -> + strip_slashes(Prefix, right); +safe_join_path(Prefix, Name) -> + filename:join(strip_slashes(Prefix, right), strip_slashes(Name, both)). + +strip_slashes(Str, Direction) -> + string:strip(Str, Direction, $/). + +new_sparse_file_reader(Reader, Sparsemap, RealSize) -> + true = validate_sparse_entries(Sparsemap, RealSize), + #sparse_file_reader{ + handle = Reader, + num_bytes = RealSize, + pos = 0, + size = RealSize, + sparse_map = Sparsemap}. + +validate_sparse_entries(Entries, RealSize) -> + validate_sparse_entries(Entries, RealSize, 0, 0). +validate_sparse_entries([], _RealSize, _I, _LastOffset) -> true; -check_extract(Name, #read_opts{files=Files}) -> - ordsets:is_element(Name, Files). +validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) -> + Offset = Entry#sparse_entry.offset, + NumBytes = Entry#sparse_entry.num_bytes, + if + Offset > ?MAX_INT64-NumBytes -> + throw({error, {invalid_sparse_map_entry, offset_too_large}}); + Offset+NumBytes > RealSize -> + throw({error, {invalid_sparse_map_entry, offset_too_large}}); + I > 0 andalso LastOffset > Offset -> + throw({error, {invalid_sparse_map_entry, overlapping_offsets}}); + true -> + ok + end, + validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes). + + +-spec parse_sparse_map(header_gnu(), reader_type()) -> + {[sparse_entry()], reader_type()}. +parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) + when Sparse#sparse_array.is_extended -> + parse_sparse_map(Sparse, Reader, []); +parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) -> + {Sparse#sparse_array.entries, Reader}. +parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) -> + case read_block(Reader) of + eof -> + throw({error, eof}); + {ok, Block, Reader2} -> + Sparse2 = to_sparse_array(Block), + parse_sparse_map(Sparse2, Reader2, Entries++Acc) + end; +parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) -> + Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) -> + A =< B + end, Entries++Acc), + {Sorted, Reader}. + +%% Defined by taking the sum of the unsigned byte values of the +%% entire header record, treating the checksum bytes to as ASCII spaces +compute_checksum(<>) -> + C0 = checksum(H1) + (byte_size(H2) * $\s), + C1 = checksum(Rest), + C0 + C1. + +compute_signed_checksum(<>) -> + C0 = signed_checksum(H1) + (byte_size(H2) * $\s), + C1 = signed_checksum(Rest), + C0 + C1. -get_header(File) -> - case do_read(File, ?record_size) of - eof -> - throw({error,eof}); - {ok, Bin} when is_binary(Bin) -> - convert_header(Bin); - {ok, List} -> - convert_header(list_to_binary(List)); - {error, Reason} -> - throw({error, Reason}) - end. +%% Returns the checksum of a binary. +checksum(Bin) -> checksum(Bin, 0). +checksum(<>, Sum) -> + checksum(Rest, Sum+A); +checksum(<<>>, Sum) -> Sum. -%% Converts the tar header to a record. +signed_checksum(Bin) -> signed_checksum(Bin, 0). +signed_checksum(<>, Sum) -> + signed_checksum(Rest, Sum+A); +signed_checksum(<<>>, Sum) -> Sum. + +-spec parse_numeric(binary()) -> non_neg_integer(). +parse_numeric(<<>>) -> + 0; +parse_numeric(<> = Bin) -> + %% check for base-256 format first + %% if the bit is set, then all following bits constitute a two's + %% complement encoded number in big-endian byte order + if + First band 16#80 =/= 0 -> + %% Handling negative numbers relies on the following identity: + %% -a-1 == ^a + %% If the number is negative, we use an inversion mask to invert + %% the data bytes and treat the value as an unsigned number + Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end, + Bytes = binary:bin_to_list(Bin), + Reducer = fun (C, {I, X}) -> + C1 = C bxor Inv, + C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end, + if (X bsr 56) > 0 -> + throw({error,integer_overflow}); + true -> + {I+1, (X bsl 8) bor C2} + end + end, + {_, N} = lists:foldl(Reducer, {0,0}, Bytes), + if (N bsr 63) > 0 -> + throw({error, integer_overflow}); + true -> + if Inv =:= 16#FF -> + -1 bxor N; + true -> + N + end + end; + true -> + %% normal case is an octal number + parse_octal(Bin) + end. -convert_header(Bin) when byte_size(Bin) =:= ?record_size -> - case verify_checksum(Bin) of - ok -> - Hd = #tar_header{name=get_name(Bin), - mode=from_octal(Bin, ?th_mode, ?th_mode_len), - uid=from_octal(Bin, ?th_uid, ?th_uid_len), - gid=from_octal(Bin, ?th_gid, ?th_gid_len), - size=from_octal(Bin, ?th_size, ?th_size_len), - mtime=from_octal(Bin, ?th_mtime, ?th_mtime_len), - linkname=from_string(Bin, - ?th_linkname, ?th_linkname_len), - typeflag=typeflag(Bin)}, - convert_header1(Hd); - eof -> - eof +parse_octal(Bin) when is_binary(Bin) -> + %% skip leading/trailing zero bytes and spaces + do_parse_octal(Bin, <<>>). +do_parse_octal(<<>>, <<>>) -> + 0; +do_parse_octal(<<>>, Acc) -> + case io_lib:fread("~8u", binary:bin_to_list(Acc)) of + {error, _} -> throw({error, invalid_tar_checksum}); + {ok, [Octal], []} -> Octal; + {ok, _, _} -> throw({error, invalid_tar_checksum}) end; -convert_header(Bin) when byte_size(Bin) =:= 0 -> +do_parse_octal(<<$\s,Rest/binary>>, Acc) -> + do_parse_octal(Rest, Acc); +do_parse_octal(<<0, Rest/binary>>, Acc) -> + do_parse_octal(Rest, Acc); +do_parse_octal(<>, Acc) -> + do_parse_octal(Rest, <>). + +parse_string(Bin) when is_binary(Bin) -> + do_parse_string(Bin, <<>>). +do_parse_string(<<>>, Acc) -> + case unicode:characters_to_list(Acc) of + Str when is_list(Str) -> + Str; + {incomplete, _Str, _Rest} -> + binary:bin_to_list(Acc); + {error, _Str, _Rest} -> + throw({error, {bad_header, invalid_string}}) + end; +do_parse_string(<<0, _/binary>>, Acc) -> + do_parse_string(<<>>, Acc); +do_parse_string(<>, Acc) -> + do_parse_string(Rest, <>). + +convert_header(Bin, #reader{pos=Pos}=Reader) + when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 -> + case get_format(Bin) of + ?FORMAT_UNKNOWN -> + throw({error, bad_header}); + {ok, Format, V7} -> + unpack_format(Format, V7, Bin, Reader); + {error, Reason} -> + throw({error, {bad_header, Reason}}) + end; +convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE -> + throw({error, misaligned_read, Pos}); +convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 -> eof; -convert_header(_Bin) -> +convert_header(_Bin, _Reader) -> throw({error, eof}). -%% Basic sanity. Better set the element size to zero here if the type -%% always is of zero length. - -convert_header1(H) when H#tar_header.typeflag =:= symlink, H#tar_header.size =/= 0 -> - convert_header1(H#tar_header{size=0}); -convert_header1(H) when H#tar_header.typeflag =:= directory, H#tar_header.size =/= 0 -> - convert_header1(H#tar_header{size=0}); -convert_header1(Header) -> - Header. - -typeflag(Bin) -> - [T] = binary_to_list(Bin, ?th_typeflag+1, ?th_typeflag+1), - case T of - 0 -> regular; - $0 -> regular; - $1 -> link; - $2 -> symlink; - $3 -> char; - $4 -> block; - $5 -> directory; - $6 -> fifo; - $7 -> regular; - _ -> unknown +%% Creates a partially-populated header record based +%% on the provided file_info record. If the file is +%% a symlink, then `link` is used as the link target. +%% If the file is a directory, a slash is appended to the name. +fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) -> + BaseHeader = #tar_header{name=Name, + mtime=Fi#file_info.mtime, + atime=Fi#file_info.atime, + ctime=Fi#file_info.ctime, + mode=Fi#file_info.mode, + uid=Fi#file_info.uid, + gid=Fi#file_info.gid, + typeflag=?TYPE_REGULAR}, + do_fileinfo_to_header(BaseHeader, Fi, Link). + +do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) -> + Header#tar_header{size=Size,typeflag=?TYPE_REGULAR}; +do_fileinfo_to_header(#tar_header{name=Name}=Header, + #file_info{type=directory}, _Link) -> + Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR}; +do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) -> + Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link}; +do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) + when (Mode band ?S_IFMT) =:= ?S_IFCHR -> + Header#tar_header{typeflag=?TYPE_CHAR, + devmajor=Fi#file_info.major_device, + devminor=Fi#file_info.minor_device}; +do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link) + when (Mode band ?S_IFMT) =:= ?S_IFBLK -> + Header#tar_header{typeflag=?TYPE_BLOCK, + devmajor=Fi#file_info.major_device, + devminor=Fi#file_info.minor_device}; +do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link) + when (Mode band ?S_IFMT) =:= ?S_FIFO -> + Header#tar_header{typeflag=?TYPE_FIFO}; +do_fileinfo_to_header(Header, Fi, _Link) -> + {error, {invalid_file_type, Header#tar_header.name, Fi}}. + +is_ascii(Str) when is_list(Str) -> + not lists:any(fun (Char) -> Char >= 16#80 end, Str); +is_ascii(Bin) when is_binary(Bin) -> + is_ascii1(Bin). + +is_ascii1(<<>>) -> + true; +is_ascii1(<>) when C >= 16#80 -> + false; +is_ascii1(<<_, Rest/binary>>) -> + is_ascii1(Rest). + +to_ascii(Str) when is_list(Str) -> + case is_ascii(Str) of + true -> + unicode:characters_to_binary(Str); + false -> + Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str), + unicode:characters_to_binary(Chars) + end; +to_ascii(Bin) when is_binary(Bin) -> + to_ascii(Bin, <<>>). +to_ascii(<<>>, Acc) -> + Acc; +to_ascii(<>, Acc) when C < 16#80 -> + to_ascii(Rest, <>); +to_ascii(<<_, Rest/binary>>, Acc) -> + to_ascii(Rest, Acc). + +is_header_only_type(?TYPE_SYMLINK) -> true; +is_header_only_type(?TYPE_LINK) -> true; +is_header_only_type(?TYPE_DIR) -> true; +is_header_only_type(_) -> false. + +posix_to_erlang_time(Sec) -> + OneMillion = 1000000, + Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}), + erlang:universaltime_to_localtime(Time). + +foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts) + when is_function(Fun,4) -> + case foldl_read0(Reader, Fun, Accu, Opts) of + {ok, Result, _Reader2} -> + Result; + {error, _} = Err -> + Err + end; +foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) -> + {error, {read_mode_expected, Access}}; +foldl_read(TarName, Fun, Accu, #read_opts{}=Opts) + when is_function(Fun,4) -> + try open(TarName, [read|Opts#read_opts.open_mode]) of + {ok, #reader{access=read}=Reader} -> + foldl_read(Reader, Fun, Accu, Opts); + {error, _} = Err -> + Err + catch + throw:Err -> + Err end. -%% Get the name of the file from the prefix and name fields of the -%% tar header. - -get_name(Bin0) -> - List0 = get_name_raw(Bin0), - case file:native_name_encoding() of - utf8 -> - Bin = list_to_binary(List0), - case unicode:characters_to_list(Bin) of - {error,_,_} -> - List0; - List when is_list(List) -> - List - end; - latin1 -> - List0 +foldl_read0(Reader, Fun, Accu, Opts) -> + try foldl_read1(Fun, Accu, Reader, Opts, #{}) of + {ok,_,_} = Ok -> + Ok + catch + throw:{error, {Reason, Format, Args}} -> + read_verbose(Opts, Format, Args), + {error, Reason}; + throw:Err -> + Err end. -get_name_raw(Bin) -> - Name = from_string(Bin, ?th_name, ?th_name_len), - case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of - [0] -> - Name; - [_] -> - Prefix = binary_to_list(Bin, ?th_prefix+1, byte_size(Bin)), - lists:reverse(remove_nulls(Prefix), [$/|Name]) +foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) -> + {ok, Reader1} = skip_unread(Reader0), + case get_header(Reader1) of + eof -> + Fun(eof, Reader1, Opts, Accu0); + {Header, Reader2} -> + case Header#tar_header.typeflag of + ?TYPE_X_HEADER -> + {ExtraHeaders2, Reader3} = parse_pax(Reader2), + ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2), + foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3); + ?TYPE_GNU_LONGNAME -> + {RealName, Reader3} = get_real_name(Reader2), + ExtraHeaders2 = maps:put(?PAX_PATH, + parse_string(RealName), ExtraHeaders), + foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); + ?TYPE_GNU_LONGLINK -> + {RealName, Reader3} = get_real_name(Reader2), + ExtraHeaders2 = maps:put(?PAX_LINKPATH, + parse_string(RealName), ExtraHeaders), + foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2); + _ -> + Header1 = merge_pax(Header, ExtraHeaders), + {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0), + foldl_read1(Fun, NewAccu, Reader3, Opts, #{}) + end end. -from_string(Bin, Pos, Len) -> - lists:reverse(remove_nulls(binary_to_list(Bin, Pos+1, Pos+Len))). - -%% Returns all characters up to (but not including) the first null -%% character, in REVERSE order. - -remove_nulls(List) -> - remove_nulls(List, []). - -remove_nulls([0|_], Result) -> - remove_nulls([], Result); -remove_nulls([C|Rest], Result) -> - remove_nulls(Rest, [C|Result]); -remove_nulls([], Result) -> - Result. - -from_octal(Bin, Pos, Len) -> - from_octal(binary_to_list(Bin, Pos+1, Pos+Len)). - -from_octal([$\s|Rest]) -> - from_octal(Rest); -from_octal([Digit|Rest]) when $0 =< Digit, Digit =< $7 -> - from_octal(Rest, Digit-$0); -from_octal(Bin) when is_binary(Bin) -> - from_octal(binary_to_list(Bin)); -from_octal(Other) -> - throw({error, {bad_header, "Bad octal number: ~p", [Other]}}). - -from_octal([Digit|Rest], Result) when $0 =< Digit, Digit =< $7 -> - from_octal(Rest, Result*8+Digit-$0); -from_octal([$\s|_], Result) -> - Result; -from_octal([0|_], Result) -> - Result; -from_octal(Other, _) -> - throw({error, {bad_header, "Bad contents in octal field: ~p", [Other]}}). - -%% Retrieves the next element from the archive. -%% Returns {ok, Bin} | eof | {error, Reason} - -get_element(File, #tar_header{size = 0}) -> - skip_to_next(File), - {ok,<<>>}; -get_element(File, #tar_header{size = Size}) -> - case do_read(File, Size) of - {ok,Bin}=Res when byte_size(Bin) =:= Size -> - skip_to_next(File), - Res; - {ok,List} when length(List) =:= Size -> - skip_to_next(File), - {ok,list_to_binary(List)}; - {ok,_} -> throw({error,eof}); - {error, Reason} -> throw({error, Reason}); - eof -> throw({error,eof}) +%% Applies all known PAX attributes to the current tar header +-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header(). +merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) -> + do_merge_pax(Header, maps:to_list(ExtraHeaders)). + +do_merge_pax(Header, []) -> + Header; +do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) -> + do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest); +do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) -> + do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest); +do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) -> + do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest); +do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) -> + do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest); +do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) -> + Uid2 = binary_to_integer(Uid), + do_merge_pax(Header#tar_header{uid=Uid2}, Rest); +do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) -> + Gid2 = binary_to_integer(Gid), + do_merge_pax(Header#tar_header{gid=Gid2}, Rest); +do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) -> + Atime2 = parse_pax_time(Atime), + do_merge_pax(Header#tar_header{atime=Atime2}, Rest); +do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) -> + Mtime2 = parse_pax_time(Mtime), + do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest); +do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) -> + Ctime2 = parse_pax_time(Ctime), + do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest); +do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) -> + Size2 = binary_to_integer(Size), + do_merge_pax(Header#tar_header{size=Size2}, Rest); +do_merge_pax(Header, [{<>, _Value}|Rest]) -> + do_merge_pax(Header, Rest); +do_merge_pax(Header, [_Ignore|Rest]) -> + do_merge_pax(Header, Rest). + +%% Returns the time since UNIX epoch as a datetime +-spec parse_pax_time(binary()) -> calendar:datetime(). +parse_pax_time(Bin) when is_binary(Bin) -> + TotalNano = case binary:split(Bin, [<<$.>>]) of + [SecondsStr, NanoStr0] -> + Seconds = binary_to_integer(SecondsStr), + if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE -> + %% right pad + PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0), + Padding = binary:copy(<<$0>>, PaddingN), + NanoStr1 = <>, + Nano = binary_to_integer(NanoStr1), + (Seconds*?BILLION)+Nano; + byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE -> + %% right truncate + NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE), + Nano = binary_to_integer(NanoStr1), + (Seconds*?BILLION)+Nano; + true -> + (Seconds*?BILLION)+binary_to_integer(NanoStr0) + end; + [SecondsStr] -> + binary_to_integer(SecondsStr)*?BILLION + end, + %% truncate to microseconds + Micro = TotalNano div 1000, + Mega = Micro div 1000000000000, + Secs = Micro div 1000000 - (Mega*1000000), + Micro2 = Micro rem 1000000, + calendar:now_to_datetime({Mega, Secs, Micro2}). + +%% Given a regular file reader, reads the whole file and +%% parses all extended attributes it contains. +parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) -> + {#{}, Handle}; +parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> + case do_read(Handle0, NumBytes) of + {ok, Bytes, Handle1} -> + do_parse_pax(Handle1, Bytes, #{}); + {error, _} = Err -> + throw(Err) end. -%% Verify the checksum in the header. First try an unsigned addition -%% of all bytes in the header (as it should be according to Posix). - -verify_checksum(Bin) -> - <> = Bin, - case checksum(H1) + checksum(H2) of - 0 -> eof; - Checksum0 -> - Csum = from_octal(CheckStr), - CsumInit = ?th_chksum_len * $\s, - case Checksum0 + CsumInit of - Csum -> ok; - Unsigned -> - verify_checksum(H1, H2, CsumInit, Csum, Unsigned) - end +do_parse_pax(Reader, <<>>, Headers) -> + {Headers, Reader}; +do_parse_pax(Reader, Bin, Headers) -> + {Key, Value, Residual} = parse_pax_record(Bin), + NewHeaders = maps:put(Key, Value, Headers), + do_parse_pax(Reader, Residual, NewHeaders). + +%% Parse an extended attribute +parse_pax_record(Bin) when is_binary(Bin) -> + case binary:split(Bin, [<<$\n>>]) of + [Record, Residual] -> + case binary:split(Record, [<<$\s>>], [trim_all]) of + [_Len, Record1] -> + case binary:split(Record1, [<<$=>>], [trim_all]) of + [AttrName, AttrValue] -> + {AttrName, AttrValue, Residual}; + _Other -> + throw({error, malformed_pax_record}) + end; + _Other -> + throw({error, malformed_pax_record}) + end; + _Other -> + throw({error, malformed_pax_record}) end. -%% The checksums didn't match. Now try a signed addition. +get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) -> + {"", Handle}; +get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) -> + case do_read(Handle0, NumBytes) of + {ok, RealName, Handle1} -> + {RealName, Handle1}; + {error, _} = Err -> + throw(Err) + end; +get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) -> + case do_read(Reader0, NumBytes) of + {ok, RealName, Reader1} -> + {RealName, Reader1}; + {error, _} = Err -> + throw(Err) + end. -verify_checksum(H1, H2, Csum, ShouldBe, Unsigned) -> - case signed_sum(binary_to_list(H1), signed_sum(binary_to_list(H2), Csum)) of - ShouldBe -> ok; - Signed -> - throw({error, - {bad_header, - "Incorrect directory checksum ~w (~w), should be ~w", - [Signed, Unsigned, ShouldBe]}}) +%% Skip the remaining bytes for the current file entry +skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) -> + Padding = skip_padding(Size), + AbsPos = Handle0#reader.pos + (Size-Pos) + Padding, + case do_position(Handle0, AbsPos) of + {ok, _, Handle1} -> + Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size}; + Err -> + throw(Err) + end; +skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) -> + case do_read(Reader, Size-Pos) of + {ok, _, Reader2} -> + Reader2; + Err -> + throw(Err) end. -signed_sum([C|Rest], Sum) when C < 128 -> - signed_sum(Rest, Sum+C); -signed_sum([C|Rest], Sum) -> - signed_sum(Rest, Sum+C-256); -signed_sum([], Sum) -> Sum. - -write_extracted_element(Header, Bin, Opts) - when Opts#read_opts.output =:= memory -> - case Header#tar_header.typeflag of - regular -> - {ok, {Header#tar_header.name, Bin}}; - _ -> - ok +skip_padding(0) -> + 0; +skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 -> + 0; +skip_padding(Size) when Size =< ?BLOCK_SIZE -> + ?BLOCK_SIZE - Size; +skip_padding(Size) -> + ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE). + +skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 -> + Padding = skip_padding(Pos + ?BLOCK_SIZE), + AbsPos = Pos + Padding, + case do_position(Reader0, AbsPos) of + {ok, _, Reader1} -> + {ok, Reader1}; + Err -> + throw(Err) + end; +skip_unread(#reader{}=Reader) -> + {ok, Reader}; +skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) -> + skip_unread(Handle); +skip_unread(#reg_file_reader{}=Reader) -> + #reg_file_reader{handle=Handle} = skip_file(Reader), + {ok, Handle}; +skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) -> + skip_unread(Handle); +skip_unread(#sparse_file_reader{}=Reader) -> + #sparse_file_reader{handle=Handle} = skip_file(Reader), + {ok, Handle}. + +write_extracted_element(#tar_header{name=Name,typeflag=Type}, + Bin, + #read_opts{output=memory}=Opts) -> + case typeflag(Type) of + regular -> + read_verbose(Opts, "x ~ts~n", [Name]), + {ok, {Name, Bin}}; + _ -> + ok end; -write_extracted_element(Header, Bin, Opts) -> - Name = filename:absname(Header#tar_header.name, Opts#read_opts.cwd), - Created = - case Header#tar_header.typeflag of - regular -> - write_extracted_file(Name, Bin, Opts); - directory -> - create_extracted_dir(Name, Opts); - symlink -> - create_symlink(Name, Header, Opts); - Other -> % Ignore. - read_verbose(Opts, "x ~ts - unsupported type ~p~n", - [Name, Other]), - not_written - end, +write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> + Name1 = filename:absname(Name0, Opts#read_opts.cwd), + Created = + case typeflag(Header#tar_header.typeflag) of + regular -> + create_regular(Name1, Name0, Bin, Opts); + directory -> + read_verbose(Opts, "x ~ts~n", [Name0]), + create_extracted_dir(Name1, Opts); + symlink -> + read_verbose(Opts, "x ~ts~n", [Name0]), + create_symlink(Name1, Header#tar_header.linkname, Opts); + Device when Device =:= char orelse Device =:= block -> + %% char/block devices will be created as empty files + %% and then have their major/minor device set later + create_regular(Name1, Name0, <<>>, Opts); + fifo -> + %% fifo devices will be created as empty files + create_regular(Name1, Name0, <<>>, Opts); + Other -> % Ignore. + read_verbose(Opts, "x ~ts - unsupported type ~p~n", + [Name0, Other]), + not_written + end, case Created of - ok -> set_extracted_file_info(Name, Header); - not_written -> ok + ok -> set_extracted_file_info(Name1, Header); + not_written -> ok + end. + +create_regular(Name, NameInArchive, Bin, Opts) -> + case write_extracted_file(Name, Bin, Opts) of + not_written -> + read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]), + not_written; + Ok -> + read_verbose(Opts, "x ~ts~n", [NameInArchive]), + Ok end. create_extracted_dir(Name, _Opts) -> case file:make_dir(Name) of - ok -> ok; - {error,enotsup} -> not_written; - {error,eexist} -> not_written; - {error,enoent} -> make_dirs(Name, dir); - {error,Reason} -> throw({error, Reason}) + ok -> ok; + {error,enotsup} -> not_written; + {error,eexist} -> not_written; + {error,enoent} -> make_dirs(Name, dir); + {error,Reason} -> throw({error, Reason}) end. -create_symlink(Name, #tar_header{linkname=Linkname}=Header, Opts) -> +create_symlink(Name, Linkname, Opts) -> case file:make_symlink(Linkname, Name) of - ok -> ok; - {error,enoent} -> - ok = make_dirs(Name, file), - create_symlink(Name, Header, Opts); - {error,eexist} -> not_written; - {error,enotsup} -> - read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]), - not_written; - {error,Reason} -> throw({error, Reason}) + ok -> ok; + {error,enoent} -> + ok = make_dirs(Name, file), + create_symlink(Name, Linkname, Opts); + {error,eexist} -> not_written; + {error,enotsup} -> + read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]), + not_written; + {error,Reason} -> throw({error, Reason}) end. write_extracted_file(Name, Bin, Opts) -> Write = - case Opts#read_opts.keep_old_files of - true -> - case file:read_file_info(Name) of - {ok, _} -> false; - _ -> true - end; - false -> true - end, + case Opts#read_opts.keep_old_files of + true -> + case file:read_file_info(Name) of + {ok, _} -> false; + _ -> true + end; + false -> true + end, case Write of - true -> - read_verbose(Opts, "x ~ts~n", [Name]), - write_file(Name, Bin); - false -> - read_verbose(Opts, "x ~ts - exists, not created~n", [Name]), - not_written + true -> write_file(Name, Bin); + false -> not_written end. write_file(Name, Bin) -> case file:write_file(Name, Bin) of - ok -> ok; - {error,enoent} -> - ok = make_dirs(Name, file), - write_file(Name, Bin); - {error,Reason} -> - throw({error, Reason}) + ok -> ok; + {error,enoent} -> + ok = make_dirs(Name, file), + write_file(Name, Bin); + {error,Reason} -> + throw({error, Reason}) end. -set_extracted_file_info(_, #tar_header{typeflag = symlink}) -> ok; -set_extracted_file_info(Name, #tar_header{mode=Mode, mtime=Mtime}) -> - Info = #file_info{mode=Mode, mtime=posix_to_erlang_time(Mtime)}, +set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok; +set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok; +set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) -> + set_device_info(Name, Header); +set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) -> + set_device_info(Name, Header); +set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) -> + Info = #file_info{mode=Mode, mtime=Mtime}, + file:write_file_info(Name, Info). + +set_device_info(Name, #tar_header{}=Header) -> + Mtime = Header#tar_header.mtime, + Mode = Header#tar_header.mode, + Devmajor = Header#tar_header.devmajor, + Devminor = Header#tar_header.devminor, + Info = #file_info{ + mode=Mode, + mtime=Mtime, + major_device=Devmajor, + minor_device=Devminor + }, file:write_file_info(Name, Info). %% Makes all directories leading up to the file. make_dirs(Name, file) -> - filelib:ensure_dir(Name); + filelib:ensure_dir(Name); make_dirs(Name, dir) -> - filelib:ensure_dir(filename:join(Name,"*")). + filelib:ensure_dir(filename:join(Name,"*")). %% Prints the message on if the verbose option is given (for reading). - read_verbose(#read_opts{verbose=true}, Format, Args) -> - io:format(Format, Args), - io:nl(); + io:format(Format, Args); read_verbose(_, _, _) -> ok. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%% -%%% Utility functions. -%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% Returns the checksum of a binary. - -checksum(Bin) -> checksum(Bin, 0). - -checksum(<>, Sum) -> - checksum(T, Sum+A+B+C+D+E+F+G+H); -checksum(<>, Sum) -> - checksum(T, Sum+A); -checksum(<<>>, Sum) -> Sum. - -%% Returns a list of zeroes to pad out to the given block size. - -padding(Size, BlockSize) -> - zeroes(pad_size(Size, BlockSize)). - -pad_size(Size, BlockSize) -> - case Size rem BlockSize of - 0 -> 0; - Rem -> BlockSize-Rem - end. - -zeroes(0) -> []; -zeroes(1) -> [0]; -zeroes(2) -> [0,0]; -zeroes(Number) -> - Half = zeroes(Number div 2), - case Number rem 2 of - 0 -> [Half|Half]; - 1 -> [Half|[0|Half]] - end. - -%% Skips the given number of bytes rounded up to an even record. - -skip(File, Size) -> - %% Note: There is no point in handling failure to get the current position - %% in the file. If it doesn't work, something serious is wrong. - Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size, - {ok,_} = do_position(File, {cur, Amount}), - ok. - -%% Skips to the next record in the file. - -skip_to_next(File) -> - %% Note: There is no point in handling failure to get the current position - %% in the file. If it doesn't work, something serious is wrong. - {ok, Position} = do_position(File, {cur, 0}), - NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size, - {ok,NewPosition} = do_position(File, NewPosition), - ok. - %% Prints the message on if the verbose option is given. - add_verbose(#add_opts{verbose=true}, Format, Args) -> io:format(Format, Args); add_verbose(_, _, _) -> ok. -%% Converts a tuple containing the time to a Posix time (seconds -%% since Jan 1, 1970). +%%%%%%%%%%%%%%%%%% +%% I/O primitives +%%%%%%%%%%%%%%%%%% + +do_write(#reader{handle=Handle,func=Fun}=Reader0, Data) + when is_function(Fun,2) -> + case Fun(write,{Handle,Data}) of + ok -> + {ok, Pos, Reader1} = do_position(Reader0, {cur,0}), + {ok, Reader1#reader{pos=Pos}}; + {error, _} = Err -> + Err + end. -posix_time(Time) -> - EpochStart = {{1970,1,1},{0,0,0}}, - {Days,{Hour,Min,Sec}} = calendar:time_difference(EpochStart, Time), - 86400*Days + 3600*Hour + 60*Min + Sec. +do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts) + when is_function(Fun, 2) -> + do_copy(Reader, Source, Opts#add_opts{chunk_size=65536}); +do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize}) + when is_function(Fun, 2) -> + case file:open(Source, [read, binary]) of + {ok, SourceFd} -> + case copy_chunked(Reader, SourceFd, ChunkSize, 0) of + {ok, _Copied, _Reader2} = Ok-> + _ = file:close(SourceFd), + Ok; + Err -> + _ = file:close(SourceFd), + throw(Err) + end; + Err -> + throw(Err) + end. -posix_to_erlang_time(Sec) -> - OneMillion = 1000000, - Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}), - erlang:universaltime_to_localtime(Time). +copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) -> + case file:read(Source, ChunkSize) of + {ok, Bin} -> + {ok, Reader2} = do_write(Reader, Bin), + copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin)); + eof -> + {ok, Copied, Reader}; + Other -> + Other + end. -read_file_and_info(Name, Opts) -> - ReadInfo = Opts#add_opts.read_info, - case ReadInfo(Name) of - {ok,Info} when Info#file_info.type =:= regular, - Opts#add_opts.chunk_size>0 -> - {ok,chunked,Info}; - {ok,Info} when Info#file_info.type =:= regular -> - case file:read_file(Name) of - {ok,Bin} -> - {ok,Bin,Info}; - Error -> - Error - end; - {ok,Info} when Info#file_info.type =:= symlink -> - case file:read_link(Name) of - {ok,PointsTo} -> - {ok,PointsTo,Info}; - Error -> - Error - end; - {ok, Info} -> - {ok,[],Info}; - Error -> - Error + +do_position(#reader{handle=Handle,func=Fun}=Reader, Pos) + when is_function(Fun,2)-> + case Fun(position, {Handle,Pos}) of + {ok, NewPos} -> + %% since Pos may not always be an absolute seek, + %% make sure we update the reader with the new absolute position + {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}), + {ok, NewPos, Reader#reader{pos=AbsPos}}; + Other -> + Other end. -foreach_while_ok(Fun, [First|Rest]) -> - case Fun(First) of - ok -> foreach_while_ok(Fun, Rest); - Other -> Other +do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) -> + NumBytes = Size - Pos, + ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end, + case do_read(Handle, ActualLen) of + {ok, Bin, Handle2} -> + NewPos = Pos + ActualLen, + NumBytes2 = Size - NewPos, + Reader1 = Reader#reg_file_reader{ + handle=Handle2, + pos=NewPos, + num_bytes=NumBytes2}, + {ok, Bin, Reader1}; + Other -> + Other end; -foreach_while_ok(_, []) -> ok. - -open_mode(Mode) -> - open_mode(Mode, false, [raw], []). +do_read(#sparse_file_reader{}=Reader, Len) -> + do_sparse_read(Reader, Len); +do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len) + when is_function(Fun,2)-> + %% Always convert to binary internally + case Fun(read2,{Handle,Len}) of + {ok, List} when is_list(List) -> + Bin = list_to_binary(List), + NewPos = Pos+byte_size(Bin), + {ok, Bin, Reader#reader{pos=NewPos}}; + {ok, Bin} when is_binary(Bin) -> + NewPos = Pos+byte_size(Bin), + {ok, Bin, Reader#reader{pos=NewPos}}; + Other -> + Other + end. -open_mode(read, _, Raw, _) -> - {ok, read, Raw, []}; -open_mode(write, _, Raw, _) -> - {ok, write, Raw, []}; -open_mode([read|Rest], false, Raw, Opts) -> - open_mode(Rest, read, Raw, Opts); -open_mode([write|Rest], false, Raw, Opts) -> - open_mode(Rest, write, Raw, Opts); -open_mode([compressed|Rest], Access, Raw, Opts) -> - open_mode(Rest, Access, Raw, [compressed|Opts]); -open_mode([cooked|Rest], Access, _Raw, Opts) -> - open_mode(Rest, Access, [], Opts); -open_mode([], Access, Raw, Opts) -> - {ok, Access, Raw, Opts}; -open_mode(_, _, _, _) -> - {error, einval}. -%%%================================================================ -do_write({tar_descriptor,UsrHandle,Fun}, Data) -> Fun(write,{UsrHandle,Data}). +do_sparse_read(Reader, Len) -> + do_sparse_read(Reader, Len, <<>>). + +do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries] + }=Reader0, Len, Acc) -> + %% skip all empty fragments + Reader1 = Reader0#sparse_file_reader{sparse_map=Entries}, + do_sparse_read(Reader1, Len, Acc); +do_sparse_read(#sparse_file_reader{sparse_map=[], + pos=Pos,size=Size}=Reader0, Len, Acc) + when Pos < Size -> + %% if there are no more fragments, it is possible that there is one last sparse hole + %% this behaviour matches the BSD tar utility + %% however, GNU tar stops returning data even if we haven't reached the end + {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len), + do_sparse_read(Reader1, Len-byte_size(Bin), <>); +do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) -> + {ok, Acc, Reader}; +do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) -> + {ok, Acc, Reader}; +do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_], + pos=Pos}=Reader0, Len, Acc) + when Pos < Offset -> + {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos), + do_sparse_read(Reader1, Len-byte_size(Bin), <>); +do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries], + pos=Pos}=Reader0, Len, Acc) -> + %% we're in a data fragment, so read from it + %% end offset of fragment + EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes, + %% bytes left in fragment + NumBytes = EndPos - Pos, + ActualLen = if Len > NumBytes -> NumBytes; true -> Len end, + case do_read(Reader0#sparse_file_reader.handle, ActualLen) of + {ok, Bin, Handle} -> + BytesRead = byte_size(Bin), + ActualEndPos = Pos+BytesRead, + Reader1 = if ActualEndPos =:= EndPos -> + Reader0#sparse_file_reader{sparse_map=Entries}; + true -> + Reader0 + end, + Size = Reader1#sparse_file_reader.size, + NumBytes2 = Size - ActualEndPos, + Reader2 = Reader1#sparse_file_reader{ + handle=Handle, + pos=ActualEndPos, + num_bytes=NumBytes2}, + do_sparse_read(Reader2, Len-byte_size(Bin), <>); + Other -> + Other + end. + +%% Reads a sparse hole ending at Offset +read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) -> + N = Offset - Pos, + N2 = if N > Len -> + Len; + true -> + N + end, + Bin = <<0:N2/unit:8>>, + NumBytes = Reader#sparse_file_reader.size - (Pos+N2), + {ok, Bin, Reader#sparse_file_reader{ + num_bytes=NumBytes, + pos=Pos+N2}}. + +-spec do_close(reader()) -> ok | {error, term()}. +do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) -> + Fun(close,Handle). + +%%%%%%%%%%%%%%%%%% +%% Option parsing +%%%%%%%%%%%%%%%%%% -do_position({tar_descriptor,UsrHandle,Fun}, Pos) -> Fun(position,{UsrHandle,Pos}). +extract_opts(List) -> + extract_opts(List, default_options()). -do_read({tar_descriptor,UsrHandle,Fun}, Len) -> Fun(read2,{UsrHandle,Len}). +table_opts(List) -> + read_opts(List, default_options()). + +default_options() -> + {ok, Cwd} = file:get_cwd(), + #read_opts{cwd=Cwd}. -do_close({tar_descriptor,UsrHandle,Fun}) -> Fun(close,UsrHandle). +extract_opts([keep_old_files|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{keep_old_files=true}); +extract_opts([{cwd, Cwd}|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{cwd=Cwd}); +extract_opts([{files, Files}|Rest], Opts) -> + Set = ordsets:from_list(Files), + extract_opts(Rest, Opts#read_opts{files=Set}); +extract_opts([memory|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{output=memory}); +extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); +extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); +extract_opts([verbose|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{verbose=true}); +extract_opts([Other|Rest], Opts) -> + extract_opts(Rest, read_opts([Other], Opts)); +extract_opts([], Opts) -> + Opts. + +read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]}); +read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> + read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); +read_opts([verbose|Rest], Opts) -> + read_opts(Rest, Opts#read_opts{verbose=true}); +read_opts([_|Rest], Opts) -> + read_opts(Rest, Opts); +read_opts([], Opts) -> + Opts. diff --git a/lib/stdlib/src/erl_tar.hrl b/lib/stdlib/src/erl_tar.hrl new file mode 100644 index 0000000000..d646d02989 --- /dev/null +++ b/lib/stdlib/src/erl_tar.hrl @@ -0,0 +1,394 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% + +%% Options used when adding files to a tar archive. +-record(add_opts, { + read_info, %% Fun to use for read file/link info. + chunk_size = 0, %% For file reading when sending to sftp. 0=do not chunk + verbose = false}). %% Verbose on/off. +-type add_opts() :: #add_opts{}. + +%% Options used when reading a tar archive. +-record(read_opts, { + cwd :: string(), %% Current working directory. + keep_old_files = false :: boolean(), %% Owerwrite or not. + files = all, %% Set of files to extract (or all) + output = file :: 'file' | 'memory', + open_mode = [], %% Open mode options. + verbose = false :: boolean()}). %% Verbose on/off. +-type read_opts() :: #read_opts{}. + +-type add_opt() :: dereference | + verbose | + {chunks, pos_integer()}. + +-type extract_opt() :: {cwd, string()} | + {files, [string()]} | + compressed | + cooked | + memory | + keep_old_files | + verbose. + +-type create_opt() :: compressed | + cooked | + dereference | + verbose. + +-type filelist() :: [file:filename() | + {string(), binary()} | + {string(), file:filename()}]. + +%% The tar header, once fully parsed. +-record(tar_header, { + name = "" :: string(), %% name of header file entry + mode = 8#100644 :: non_neg_integer(), %% permission and mode bits + uid = 0 :: non_neg_integer(), %% user id of owner + gid = 0 :: non_neg_integer(), %% group id of owner + size = 0 :: non_neg_integer(), %% length in bytes + mtime :: calendar:datetime(), %% modified time + typeflag :: char(), %% type of header entry + linkname = "" :: string(), %% target name of link + uname = "" :: string(), %% user name of owner + gname = "" :: string(), %% group name of owner + devmajor = 0 :: non_neg_integer(), %% major number of character or block device + devminor = 0 :: non_neg_integer(), %% minor number of character or block device + atime :: calendar:datetime(), %% access time + ctime :: calendar:datetime() %% status change time + }). +-type tar_header() :: #tar_header{}. + +%% Metadata for a sparse file fragment +-record(sparse_entry, { + offset = 0 :: non_neg_integer(), + num_bytes = 0 :: non_neg_integer()}). +-type sparse_entry() :: #sparse_entry{}. +%% Contains metadata about fragments of a sparse file +-record(sparse_array, { + entries = [] :: [sparse_entry()], + is_extended = false :: boolean(), + max_entries = 0 :: non_neg_integer()}). +-type sparse_array() :: #sparse_array{}. +%% A subset of tar header fields common to all tar implementations +-record(header_v7, { + name :: binary(), + mode :: binary(), %% octal + uid :: binary(), %% integer + gid :: binary(), %% integer + size :: binary(), %% integer + mtime :: binary(), %% integer + checksum :: binary(), %% integer + typeflag :: byte(), %% char + linkname :: binary()}). +-type header_v7() :: #header_v7{}. +%% The set of fields specific to GNU tar formatted archives +-record(header_gnu, { + header_v7 :: header_v7(), + magic :: binary(), + version :: binary(), + uname :: binary(), + gname :: binary(), + devmajor :: binary(), %% integer + devminor :: binary(), %% integer + atime :: binary(), %% integer + ctime :: binary(), %% integer + sparse :: sparse_array(), + real_size :: binary()}). %% integer +-type header_gnu() :: #header_gnu{}. +%% The set of fields specific to STAR-formatted archives +-record(header_star, { + header_v7 :: header_v7(), + magic :: binary(), + version :: binary(), + uname :: binary(), + gname :: binary(), + devmajor :: binary(), %% integer + devminor :: binary(), %% integer + prefix :: binary(), + atime :: binary(), %% integer + ctime :: binary(), %% integer + trailer :: binary()}). +-type header_star() :: #header_star{}. +%% The set of fields specific to USTAR-formatted archives +-record(header_ustar, { + header_v7 :: header_v7(), + magic :: binary(), + version :: binary(), + uname :: binary(), + gname :: binary(), + devmajor :: binary(), %% integer + devminor :: binary(), %% integer + prefix :: binary()}). +-type header_ustar() :: #header_ustar{}. + +-type header_fields() :: header_v7() | + header_gnu() | + header_star() | + header_ustar(). + +%% The overall tar reader, it holds the low-level file handle, +%% its access, position, and the I/O primitives wrapper. +-record(reader, { + handle :: file:io_device() | term(), + access :: read | write | ram, + pos = 0 :: non_neg_integer(), + func :: file_op() + }). +-type reader() :: #reader{}. +%% A reader for a regular file within the tar archive, +%% It tracks its current state relative to that file. +-record(reg_file_reader, { + handle :: reader(), + num_bytes = 0, + pos = 0, + size = 0 + }). +-type reg_file_reader() :: #reg_file_reader{}. +%% A reader for a sparse file within the tar archive, +%% It tracks its current state relative to that file. +-record(sparse_file_reader, { + handle :: reader(), + num_bytes = 0, %% bytes remaining + pos = 0, %% pos + size = 0, %% total size of file + sparse_map = #sparse_array{} + }). +-type sparse_file_reader() :: #sparse_file_reader{}. + +%% Types for the readers +-type reader_type() :: reader() | reg_file_reader() | sparse_file_reader(). +-type handle() :: file:io_device() | term(). + +%% Type for the I/O primitive wrapper function +-type file_op() :: fun((write | close | read2 | position, + {handle(), iodata()} | handle() | {handle(), non_neg_integer()} + | {handle(), non_neg_integer()}) -> + ok | eof | {ok, string() | binary()} | {ok, non_neg_integer()} + | {error, term()}). + +%% These constants (except S_IFMT) are +%% used to determine what type of device +%% a file is. Namely, `S_IFMT band file_info.mode` +%% will equal one of these contants, and tells us +%% which type it is. The stdlib file_info record +%% does not differentiate between device types, and +%% will not allow us to differentiate between sockets +%% and named pipes. These constants are pulled from libc. +-define(S_IFMT, 61440). +-define(S_IFSOCK, 49152). %% socket +-define(S_FIFO, 4096). %% fifo/named pipe +-define(S_IFBLK, 24576). %% block device +-define(S_IFCHR, 8192). %% character device + +%% Typeflag constants for the tar header +-define(TYPE_REGULAR, $0). %% regular file +-define(TYPE_REGULAR_A, 0). %% regular file +-define(TYPE_LINK, $1). %% hard link +-define(TYPE_SYMLINK, $2). %% symbolic link +-define(TYPE_CHAR, $3). %% character device node +-define(TYPE_BLOCK, $4). %% block device node +-define(TYPE_DIR, $5). %% directory +-define(TYPE_FIFO, $6). %% fifo node +-define(TYPE_CONT, $7). %% reserved +-define(TYPE_X_HEADER, $x). %% extended header +-define(TYPE_X_GLOBAL_HEADER, $g). %% global extended header +-define(TYPE_GNU_LONGNAME, $L). %% next file has a long name +-define(TYPE_GNU_LONGLINK, $K). %% next file symlinks to a file with a long name +-define(TYPE_GNU_SPARSE, $S). %% sparse file + +%% Mode constants from tar spec +-define(MODE_ISUID, 4000). %% set uid +-define(MODE_ISGID, 2000). %% set gid +-define(MODE_ISVTX, 1000). %% save text (sticky bit) +-define(MODE_ISDIR, 40000). %% directory +-define(MODE_ISFIFO, 10000). %% fifo +-define(MODE_ISREG, 100000). %% regular file +-define(MODE_ISLNK, 120000). %% symbolic link +-define(MODE_ISBLK, 60000). %% block special file +-define(MODE_ISCHR, 20000). %% character special file +-define(MODE_ISSOCK, 140000). %% socket + +%% Keywords for PAX extended header +-define(PAX_ATIME, <<"atime">>). +-define(PAX_CHARSET, <<"charset">>). +-define(PAX_COMMENT, <<"comment">>). +-define(PAX_CTIME, <<"ctime">>). %% ctime is not a valid pax header +-define(PAX_GID, <<"gid">>). +-define(PAX_GNAME, <<"gname">>). +-define(PAX_LINKPATH, <<"linkpath">>). +-define(PAX_MTIME, <<"mtime">>). +-define(PAX_PATH, <<"path">>). +-define(PAX_SIZE, <<"size">>). +-define(PAX_UID, <<"uid">>). +-define(PAX_UNAME, <<"uname">>). +-define(PAX_XATTR, <<"SCHILY.xattr.">>). +-define(PAX_XATTR_STR, "SCHILY.xattr."). +-define(PAX_NONE, <<"">>). + +%% Tar format constants +%% Unknown format +-define(FORMAT_UNKNOWN, 0). +%% The format of the original Unix V7 tar tool prior to standardization +-define(FORMAT_V7, 1). +%% The old and new GNU formats, incompatible with USTAR. +%% This covers the old GNU sparse extension, but it does +%% not cover the GNU sparse extensions using PAX headers, +%% versions 0.0, 0.1, and 1.0; these fall under the PAX format. +-define(FORMAT_GNU, 2). +%% Schily's tar format, which is incompatible with USTAR. +%% This does not cover STAR extensions to the PAX format; these +%% fall under the PAX format. +-define(FORMAT_STAR, 3). +%% USTAR is the former standardization of tar defined in POSIX.1-1988, +%% it is incompatible with the GNU and STAR formats. +-define(FORMAT_USTAR, 4). +%% PAX is the latest standardization of tar defined in POSIX.1-2001. +%% This is an extension of USTAR and is "backwards compatible" with it. +%% +%% Some newer formats add their own extensions to PAX, such as GNU sparse +%% files and SCHILY extended attributes. Since they are backwards compatible +%% with PAX, they will be labelled as "PAX". +-define(FORMAT_PAX, 5). + +%% Magic constants +-define(MAGIC_GNU, <<"ustar ">>). +-define(VERSION_GNU, <<" \x00">>). +-define(MAGIC_USTAR, <<"ustar\x00">>). +-define(VERSION_USTAR, <<"00">>). +-define(TRAILER_STAR, <<"tar\x00">>). + +%% Size constants +-define(BLOCK_SIZE, 512). %% size of each block in a tar stream +-define(NAME_SIZE, 100). %% max length of the name field in USTAR format +-define(PREFIX_SIZE, 155). %% max length of the prefix field in USTAR format + +%% Maximum size of a nanosecond value as an integer +-define(MAX_NANO_INT_SIZE, 9). +%% Maximum size of a 64-bit signed integer +-define(MAX_INT64, (1 bsl 63 - 1)). + +-define(PAX_GNU_SPARSE_NUMBLOCKS, <<"GNU.sparse.numblocks">>). +-define(PAX_GNU_SPARSE_OFFSET, <<"GNU.sparse.offset">>). +-define(PAX_GNU_SPARSE_NUMBYTES, <<"GNU.sparse.numbytes">>). +-define(PAX_GNU_SPARSE_MAP, <<"GNU.sparse.map">>). +-define(PAX_GNU_SPARSE_NAME, <<"GNU.sparse.name">>). +-define(PAX_GNU_SPARSE_MAJOR, <<"GNU.sparse.major">>). +-define(PAX_GNU_SPARSE_MINOR, <<"GNU.sparse.minor">>). +-define(PAX_GNU_SPARSE_SIZE, <<"GNU.sparse.size">>). +-define(PAX_GNU_SPARSE_REALSIZE, <<"GNU.sparse.realsize">>). + +-define(V7_NAME, 0). +-define(V7_NAME_LEN, 100). +-define(V7_MODE, 100). +-define(V7_MODE_LEN, 8). +-define(V7_UID, 108). +-define(V7_UID_LEN, 8). +-define(V7_GID, 116). +-define(V7_GID_LEN, 8). +-define(V7_SIZE, 124). +-define(V7_SIZE_LEN, 12). +-define(V7_MTIME, 136). +-define(V7_MTIME_LEN, 12). +-define(V7_CHKSUM, 148). +-define(V7_CHKSUM_LEN, 8). +-define(V7_TYPE, 156). +-define(V7_TYPE_LEN, 1). +-define(V7_LINKNAME, 157). +-define(V7_LINKNAME_LEN, 100). + +-define(STAR_TRAILER, 508). +-define(STAR_TRAILER_LEN, 4). + +-define(USTAR_MAGIC, 257). +-define(USTAR_MAGIC_LEN, 6). +-define(USTAR_VERSION, 263). +-define(USTAR_VERSION_LEN, 2). +-define(USTAR_UNAME, 265). +-define(USTAR_UNAME_LEN, 32). +-define(USTAR_GNAME, 297). +-define(USTAR_GNAME_LEN, 32). +-define(USTAR_DEVMAJ, 329). +-define(USTAR_DEVMAJ_LEN, 8). +-define(USTAR_DEVMIN, 337). +-define(USTAR_DEVMIN_LEN, 8). +-define(USTAR_PREFIX, 345). +-define(USTAR_PREFIX_LEN, 155). + +-define(GNU_MAGIC, 257). +-define(GNU_MAGIC_LEN, 6). +-define(GNU_VERSION, 263). +-define(GNU_VERSION_LEN, 2). + +%% ?BLOCK_SIZE of zero-bytes. +%% Two of these in a row mark the end of an archive. +-define(ZERO_BLOCK, <<0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0>>). + +-define(BILLION, 1000000000). + +-define(EPOCH, {{1970,1,1}, {0,0,0}}). diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl index 6f3979bb77..d6b6d3f80c 100644 --- a/lib/stdlib/test/tar_SUITE.erl +++ b/lib/stdlib/test/tar_SUITE.erl @@ -22,9 +22,10 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, borderline/1, atomic/1, long_names/1, create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1, - extract_from_binary_compressed/1, + extract_from_binary_compressed/1, extract_filtered/1, extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1, - memory/1,unicode/1]). + memory/1,unicode/1,read_other_implementations/1, + sparse/1, init/1]). -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). @@ -35,7 +36,10 @@ all() -> [borderline, atomic, long_names, create_long_names, bad_tar, errors, extract_from_binary, extract_from_binary_compressed, extract_from_open_file, - symlinks, open_add_close, cooked_compressed, memory, unicode]. + extract_filtered, + symlinks, open_add_close, cooked_compressed, memory, unicode, + read_other_implementations, + sparse,init]. groups() -> []. @@ -84,17 +88,30 @@ borderline(Config) when is_list(Config) -> ok. borderline_test(Size, TempDir) -> - Archive = filename:join(TempDir, "ar_"++integer_to_list(Size)++".tar"), - Name = filename:join(TempDir, "file_"++integer_to_list(Size)), io:format("Testing size ~p", [Size]), + borderline_test(Size, TempDir, true), + borderline_test(Size, TempDir, false), + ok. + +borderline_test(Size, TempDir, IsUstar) -> + Prefix = case IsUstar of + true -> + "file_"; + false -> + lists:duplicate(100, $f) ++ "ile_" + end, + SizeList = integer_to_list(Size), + Archive = filename:join(TempDir, "ar_"++ SizeList ++".tar"), + Name = filename:join(TempDir, Prefix++SizeList), %% Create a file and archive it. X0 = erlang:monotonic_time(), - file:write_file(Name, random_byte_list(X0, Size)), + ok = file:write_file(Name, random_byte_list(X0, Size)), ok = erl_tar:create(Archive, [Name]), ok = file:delete(Name), %% Verify listing and extracting. + IsUstar = is_ustar(Archive), {ok, [Name]} = erl_tar:table(Archive), ok = erl_tar:extract(Archive, [verbose]), @@ -103,7 +120,12 @@ borderline_test(Size, TempDir) -> true = match_byte_list(X0, binary_to_list(Bin)), %% Verify that Unix tar can read it. - tar_tf(Archive, Name), + case IsUstar of + true -> + tar_tf(Archive, Name); + false -> + ok + end, ok. @@ -336,6 +358,7 @@ create_long_names() -> ok = erl_tar:tt(TarName), %% Extract and verify. + true = is_ustar(TarName), ExtractDir = "extract_dir", ok = file:make_dir(ExtractDir), ok = erl_tar:extract(TarName, [{cwd,ExtractDir}]), @@ -357,7 +380,7 @@ make_dirs([], Dir) -> %% Try erl_tar:table/2 and erl_tar:extract/2 on some corrupted tar files. bad_tar(Config) when is_list(Config) -> try_bad("bad_checksum", bad_header, Config), - try_bad("bad_octal", bad_header, Config), + try_bad("bad_octal", invalid_tar_checksum, Config), try_bad("bad_too_short", eof, Config), try_bad("bad_even_shorter", eof, Config), ok. @@ -370,8 +393,10 @@ try_bad(Name0, Reason, Config) -> Name = Name0 ++ ".tar", io:format("~nTrying ~s", [Name]), Full = filename:join(DataDir, Name), - Opts = [verbose, {cwd, PrivDir}], + Dest = filename:join(PrivDir, Name0), + Opts = [verbose, {cwd, Dest}], Expected = {error, Reason}, + io:fwrite("Expected: ~p\n", [Expected]), case {erl_tar:table(Full, Opts), erl_tar:extract(Full, Opts)} of {Expected, Expected} -> io:format("Result: ~p", [Expected]), @@ -493,6 +518,27 @@ extract_from_binary_compressed(Config) when is_list(Config) -> ok. +%% Test extracting a tar archive from a binary. +extract_filtered(Config) when is_list(Config) -> + DataDir = proplists:get_value(data_dir, Config), + PrivDir = proplists:get_value(priv_dir, Config), + Long = filename:join(DataDir, "no_fancy_stuff.tar"), + ExtractDir = filename:join(PrivDir, "extract_from_binary"), + ok = file:make_dir(ExtractDir), + + ok = erl_tar:extract(Long, [{cwd,ExtractDir},{files,["no_fancy_stuff/EPLICENCE"]}]), + + %% Verify. + Dir = filename:join(ExtractDir, "no_fancy_stuff"), + true = filelib:is_dir(Dir), + false = filelib:is_file(filename:join(Dir, "a_dir_list")), + true = filelib:is_file(filename:join(Dir, "EPLICENCE")), + + %% Clean up. + delete_files([ExtractDir]), + + ok. + %% Test extracting a tar archive from an open file. extract_from_open_file(Config) when is_list(Config) -> DataDir = proplists:get_value(data_dir, Config), @@ -573,6 +619,7 @@ symlinks(Dir, BadSymlink, PointsTo) -> ok = file:write_file(AFile, ALine), ok = file:make_symlink(AFile, GoodSymlink), ok = erl_tar:create(Tar, [BadSymlink, GoodSymlink, AFile], [verbose]), + true = is_ustar(Tar), %% List contents of tar file. @@ -581,6 +628,7 @@ symlinks(Dir, BadSymlink, PointsTo) -> %% Also create another archive with the dereference flag. ok = erl_tar:create(DerefTar, [AFile, GoodSymlink], [dereference, verbose]), + true = is_ustar(DerefTar), %% Extract files to a new directory. @@ -619,13 +667,50 @@ long_symlink(Dir) -> ok = file:set_cwd(Dir), AFile = "long_symlink", - FarTooLong = "/tmp/aarrghh/this/path/is/far/longer/than/one/hundred/characters/which/is/the/maximum/number/of/characters/allowed", - ok = file:make_symlink(FarTooLong, AFile), - {error,Error} = erl_tar:create(Tar, [AFile], [verbose]), - io:format("Error: ~s\n", [erl_tar:format_error(Error)]), - {FarTooLong,symbolic_link_too_long} = Error, + RequiresPAX = "/tmp/aarrghh/this/path/is/far/longer/than/one/hundred/characters/which/is/the/maximum/number/of/characters/allowed", + ok = file:make_symlink(RequiresPAX, AFile), + ok = erl_tar:create(Tar, [AFile], [verbose]), + false = is_ustar(Tar), + NewDir = filename:join(Dir, "extracted"), + _ = file:make_dir(NewDir), + ok = erl_tar:extract(Tar, [{cwd, NewDir}, verbose]), + ok = file:set_cwd(NewDir), + {ok, #file_info{type=symlink}} = file:read_link_info(AFile), + {ok, RequiresPAX} = file:read_link(AFile), + ok. + +init(Config) when is_list(Config) -> + PrivDir = proplists:get_value(priv_dir, Config), + ok = file:set_cwd(PrivDir), + Dir = filename:join(PrivDir, "init"), + ok = file:make_dir(Dir), + + [{FileOne,_,_}|_] = oac_files(), + TarOne = filename:join(Dir, "archive1.tar"), + {ok,Fd} = file:open(TarOne, [write]), + + %% If the arity of the fun is wrong, badarg should be returned + {error, badarg} = erl_tar:init(Fd, write, fun file_op_bad/1), + + %% Otherwise we should be good to go + {ok, Tar} = erl_tar:init(Fd, write, fun file_op/2), + ok = erl_tar:add(Tar, FileOne, []), + ok = erl_tar:close(Tar), + {ok, [FileOne]} = erl_tar:table(TarOne), ok. +file_op_bad(_) -> + throw({error, should_never_be_called}). + +file_op(write, {Fd, Data}) -> + file:write(Fd, Data); +file_op(position, {Fd, Pos}) -> + file:position(Fd, Pos); +file_op(read2, {Fd, Size}) -> + file:read(Fd, Size); +file_op(close, Fd) -> + file:close(Fd). + open_add_close(Config) when is_list(Config) -> PrivDir = proplists:get_value(priv_dir, Config), ok = file:set_cwd(PrivDir), @@ -643,17 +728,26 @@ open_add_close(Config) when is_list(Config) -> TarOne = filename:join(Dir, "archive1.tar"), {ok,AD} = erl_tar:open(TarOne, [write]), ok = erl_tar:add(AD, FileOne, []), - ok = erl_tar:add(AD, FileTwo, "second file", []), - ok = erl_tar:add(AD, FileThree, [verbose]), + + %% Add with {NameInArchive,Name} + ok = erl_tar:add(AD, {"second file", FileTwo}, []), + + %% Add with {binary, Bin} + {ok,FileThreeBin} = file:read_file(FileThree), + ok = erl_tar:add(AD, {FileThree, FileThreeBin}, [verbose]), + + %% Add with Name ok = erl_tar:add(AD, FileThree, "chunked", [{chunks,11411},verbose]), ok = erl_tar:add(AD, ADir, [verbose]), ok = erl_tar:add(AD, AnotherDir, [verbose]), ok = erl_tar:close(AD), + true = is_ustar(TarOne), ok = erl_tar:t(TarOne), ok = erl_tar:tt(TarOne), - {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]} = erl_tar:table(TarOne), + Expected = {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]}, + Expected = erl_tar:table(TarOne), delete_files(["oac_file","oac_small","oac_big",Dir,AnotherDir,ADir]), @@ -718,6 +812,41 @@ memory(Config) when is_list(Config) -> ok = delete_files([Name1,Name2]), ok. +read_other_implementations(Config) when is_list(Config) -> + DataDir = proplists:get_value(data_dir, Config), + Files = ["v7.tar", "gnu.tar", "bsd.tar", + "star.tar", "pax_mtime.tar"], + do_read_other_implementations(Files, DataDir). + +do_read_other_implementations([], _DataDir) -> + ok; +do_read_other_implementations([File|Rest], DataDir) -> + io:format("~nTrying ~s", [File]), + Full = filename:join(DataDir, File), + {ok, _} = erl_tar:table(Full), + {ok, _} = erl_tar:extract(Full, [memory]), + do_read_other_implementations(Rest, DataDir). + + +%% Test handling of sparse files +sparse(Config) when is_list(Config) -> + DataDir = proplists:get_value(data_dir, Config), + PrivDir = proplists:get_value(priv_dir, Config), + Sparse01Empty = "sparse01_empty.tar", + Sparse01 = "sparse01.tar", + Sparse10Empty = "sparse10_empty.tar", + Sparse10 = "sparse10.tar", + do_sparse([Sparse01Empty, Sparse01, Sparse10Empty, Sparse10], DataDir, PrivDir). + +do_sparse([], _DataDir, _PrivDir) -> + ok; +do_sparse([Name|Rest], DataDir, PrivDir) -> + io:format("~nTrying sparse file ~s", [Name]), + Full = filename:join(DataDir, Name), + {ok, [_]} = erl_tar:table(Full), + {ok, _} = erl_tar:extract(Full, [memory]), + do_sparse(Rest, DataDir, PrivDir). + %% Test filenames with characters outside the US ASCII range. unicode(Config) when is_list(Config) -> run_unicode_node(Config, "+fnu"), @@ -753,6 +882,9 @@ do_unicode(PrivDir) -> Names = lists:sort(unicode_create_files()), Tar = "unicöde.tar", ok = erl_tar:create(Tar, ["unicöde"], []), + + %% Unicode filenames require PAX format. + false = is_ustar(Tar), {ok,Names0} = erl_tar:table(Tar, []), Names = lists:sort(Names0), _ = [ok = file:delete(Name) || Name <- Names], @@ -850,3 +982,15 @@ start_node(Name, Args) -> ct:log("Node ~p started~n", [Node]), Node end. + +%% Test that the given tar file is a plain USTAR archive, +%% without any PAX extensions. +is_ustar(File) -> + {ok,Bin} = file:read_file(File), + <<_:257/binary,"ustar",0,_/binary>> = Bin, + <<_:156/binary,Type:8,_/binary>> = Bin, + case Type of + $x -> false; + $g -> false; + _ -> true + end. diff --git a/lib/stdlib/test/tar_SUITE_data/bsd.tar b/lib/stdlib/test/tar_SUITE_data/bsd.tar new file mode 100644 index 0000000000..8c31864be0 Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/bsd.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/gnu.tar b/lib/stdlib/test/tar_SUITE_data/gnu.tar new file mode 100644 index 0000000000..60268065c1 Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/gnu.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar b/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar new file mode 100644 index 0000000000..1b6e80ffac Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/sparse00.tar b/lib/stdlib/test/tar_SUITE_data/sparse00.tar new file mode 100644 index 0000000000..61a04de90b Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/sparse00.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/sparse01.tar b/lib/stdlib/test/tar_SUITE_data/sparse01.tar new file mode 100644 index 0000000000..61a04de90b Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/sparse01.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar b/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar new file mode 100644 index 0000000000..efa6d060f4 Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/sparse10.tar b/lib/stdlib/test/tar_SUITE_data/sparse10.tar new file mode 100644 index 0000000000..61a04de90b Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/sparse10.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar b/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar new file mode 100644 index 0000000000..efa6d060f4 Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/star.tar b/lib/stdlib/test/tar_SUITE_data/star.tar new file mode 100644 index 0000000000..b0631e3b13 Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/star.tar differ diff --git a/lib/stdlib/test/tar_SUITE_data/v7.tar b/lib/stdlib/test/tar_SUITE_data/v7.tar new file mode 100644 index 0000000000..9918e006bb Binary files /dev/null and b/lib/stdlib/test/tar_SUITE_data/v7.tar differ -- cgit v1.2.3