aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-02-16 15:14:50 +0100
committerGitHub <[email protected]>2017-02-16 15:14:50 +0100
commitdbf3754567fdc97a53da529e04400602c647ee7e (patch)
tree40a8f9f448b985b7924cf5d1785707cb4a80e57c
parentec84b5f05e3daa633c98b00243a52acd9620d586 (diff)
parentaa0c4b0df7cdc750450906aff4e8c81627d80605 (diff)
downloadotp-dbf3754567fdc97a53da529e04400602c647ee7e.tar.gz
otp-dbf3754567fdc97a53da529e04400602c647ee7e.tar.bz2
otp-dbf3754567fdc97a53da529e04400602c647ee7e.zip
Merge pull request #1323 from bitwalker/erl_tar2
Update erl_tar to support PAX format (redux) OTP-14226
-rw-r--r--lib/sasl/src/release_handler.erl6
-rw-r--r--lib/sasl/src/systools_make.erl4
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml72
-rw-r--r--lib/stdlib/src/Makefile4
-rw-r--r--lib/stdlib/src/erl_tar.erl2562
-rw-r--r--lib/stdlib/src/erl_tar.hrl394
-rw-r--r--lib/stdlib/test/tar_SUITE.erl178
-rw-r--r--lib/stdlib/test/tar_SUITE_data/bsd.tarbin0 -> 9216 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/gnu.tarbin0 -> 30720 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/pax_mtime.tarbin0 -> 10240 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/sparse00.tarbin0 -> 61440 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/sparse01.tarbin0 -> 61440 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/sparse01_empty.tarbin0 -> 10240 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/sparse10.tarbin0 -> 61440 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/sparse10_empty.tarbin0 -> 10240 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/star.tarbin0 -> 10240 bytes
-rw-r--r--lib/stdlib/test/tar_SUITE_data/v7.tarbin0 -> 10240 bytes
17 files changed, 2308 insertions, 912 deletions
diff --git a/lib/sasl/src/release_handler.erl b/lib/sasl/src/release_handler.erl
index 1fcc9a0288..3250311b8f 100644
--- a/lib/sasl/src/release_handler.erl
+++ b/lib/sasl/src/release_handler.erl
@@ -831,7 +831,7 @@ do_unpack_release(Root, RelDir, ReleaseName, Releases) ->
Tar = filename:join(RelDir, ReleaseName ++ ".tar.gz"),
do_check_file(Tar, regular),
Rel = ReleaseName ++ ".rel",
- extract_rel_file(filename:join("releases", Rel), Tar, Root),
+ _ = extract_rel_file(filename:join("releases", Rel), Tar, Root),
RelFile = filename:join(RelDir, Rel),
Release = check_rel(Root, RelFile, false),
#release{vsn = Vsn} = Release,
@@ -1841,14 +1841,12 @@ do_check_file(Master, FileName, Type) ->
%% by the user in another way, i.e. ignore this here.
%%-----------------------------------------------------------------
extract_rel_file(Rel, Tar, Root) ->
- erl_tar:extract(Tar, [{files, [Rel]}, {cwd, Root}, compressed]).
+ _ = erl_tar:extract(Tar, [{files, [Rel]}, {cwd, Root}, compressed]).
extract_tar(Root, Tar) ->
case erl_tar:extract(Tar, [keep_old_files, {cwd, Root}, compressed]) of
ok ->
ok;
- {error, Reason, Name} -> % Old erl_tar.
- throw({error, {cannot_extract_file, Name, Reason}});
{error, {Name, Reason}} -> % New erl_tar (R3A).
throw({error, {cannot_extract_file, Name, Reason}})
end.
diff --git a/lib/sasl/src/systools_make.erl b/lib/sasl/src/systools_make.erl
index 587bd02cb2..f03b03dc08 100644
--- a/lib/sasl/src/systools_make.erl
+++ b/lib/sasl/src/systools_make.erl
@@ -1908,8 +1908,10 @@ del_tar(Tar, TarName) ->
file:delete(TarName).
add_to_tar(Tar, FromFile, ToFile) ->
- case erl_tar:add(Tar, FromFile, ToFile, [compressed, dereference]) of
+ case catch erl_tar:add(Tar, FromFile, ToFile, [compressed, dereference]) of
ok -> ok;
+ {'EXIT', Reason} ->
+ throw({error, {tar_error, {add, FromFile, Reason}}});
{error, Error} ->
throw({error, {tar_error, {add, FromFile, Error}}})
end.
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index 24e7b64b9e..f28d8b425b 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -37,12 +37,13 @@
</modulesummary>
<description>
<p>This module archives and extract files to and from
- a tar file. This module supports the <c>ustar</c> format
- (IEEE Std 1003.1 and ISO/IEC&nbsp;9945-1). All modern <c>tar</c>
- programs (including GNU tar) can read this format. To ensure that
- that GNU tar produces a tar file that <c>erl_tar</c> can read,
- specify option <c>--format=ustar</c> to GNU tar.</p>
-
+ a tar file. This module supports reading most common tar formats,
+ namely v7, STAR, USTAR, and PAX, as well as some of GNU tar's extensions
+ to the USTAR format (sparse files most notably). It produces tar archives
+ in USTAR format, unless the files being archived require PAX format due to
+ restrictions in USTAR (such as unicode metadata, filename length, and more).
+ As such, <c>erl_tar</c> supports tar archives produced by most all modern
+ tar utilities, and produces tarballs which should be similarly portable.</p>
<p>By convention, the name of a tar file is to end in "<c>.tar</c>".
To abide to the convention, add "<c>.tar</c>" to the name.</p>
@@ -83,6 +84,8 @@
<p>If <seealso marker="kernel:file#native_name_encoding/0">
<c>file:native_name_encoding/0</c></seealso>
returns <c>latin1</c>, no translation of path names is done.</p>
+
+ <p>Unicode metadata stored in PAX headers is preserved</p>
</section>
<section>
@@ -104,21 +107,20 @@
<title>Limitations</title>
<list type="bulleted">
<item>
- <p>For maximum compatibility, it is safe to archive files with names
- up to 100 characters in length. Such tar files can generally be
- extracted by any <c>tar</c> program.</p>
- </item>
- <item>
- <p>For filenames exceeding 100 characters in length, the resulting tar
- file can only be correctly extracted by a POSIX-compatible <c>tar</c>
- program (such as Solaris <c>tar</c> or a modern GNU <c>tar</c>).</p>
- </item>
- <item>
- <p>Files with longer names than 256 bytes cannot be stored.</p>
+ <p>If you must remain compatible with the USTAR tar format, you must ensure file paths being
+ stored are less than 255 bytes in total, with a maximum filename component
+ length of 100 bytes. USTAR uses a header field (prefix) in addition to the name field, and
+ splits file paths longer than 100 bytes into two parts. This split is done on a directory boundary,
+ and is done in such a way to make the best use of the space available in those two fields, but in practice
+ this will often mean that you have less than 255 bytes for a path. <c>erl_tar</c> will
+ automatically upgrade the format to PAX to handle longer filenames, so this is only an issue if you
+ need to extract the archive with an older implementation of <c>erl_tar</c> or <c>tar</c> which does
+ not support PAX. In this case, the PAX headers will be extracted as regular files, and you will need to
+ apply them manually.</p>
</item>
<item>
- <p>The file name a symbolic link points is always limited
- to 100 characters.</p>
+ <p>Like the above, if you must remain USTAR compatible, you must also ensure than paths for
+ symbolic/hard links are no more than 100 bytes, otherwise PAX headers will be used.</p>
</item>
</list>
</section>
@@ -129,7 +131,9 @@
<fsummary>Add a file to an open tar file.</fsummary>
<type>
<v>TarDescriptor = term()</v>
- <v>Filename = filename()</v>
+ <v>FilenameOrBin = filename()|binary()</v>
+ <v>NameInArchive = filename()</v>
+ <v>Filename = filename()|{NameInArchive,FilenameOrBin}</v>
<v>Options = [Option]</v>
<v>Option = dereference|verbose|{chunks,ChunkSize}</v>
<v>ChunkSize = positive_integer()</v>
@@ -139,6 +143,9 @@
<desc>
<p>Adds a file to a tar file that has been opened for writing by
<seealso marker="#open/2"><c>open/1</c></seealso>.</p>
+ <p><c>NameInArchive</c> is the name under which the file becomes
+ stored in the tar file. The file gets this name when it is
+ extracted from the tar file.</p>
<p>Options:</p>
<taglist>
<tag><c>dereference</c></tag>
@@ -183,9 +190,6 @@
<seealso marker="#open/2"><c>open/2</c></seealso>. This function
accepts the same options as
<seealso marker="#add/3"><c>add/3</c></seealso>.</p>
- <p><c>NameInArchive</c> is the name under which the file becomes
- stored in the tar file. The file gets this name when it is
- extracted from the tar file.</p>
</desc>
</func>
@@ -206,8 +210,8 @@
<fsummary>Create a tar archive.</fsummary>
<type>
<v>Name = filename()</v>
- <v>FileList = [Filename|{NameInArchive, binary()},{NameInArchive,
- Filename}]</v>
+ <v>FileList = [Filename|{NameInArchive, FilenameOrBin}]</v>
+ <v>FilenameOrBin = filename()|binary()</v>
<v>Filename = filename()</v>
<v>NameInArchive = filename()</v>
<v>RetValue = ok|{error,{Name,Reason}}</v>
@@ -225,8 +229,8 @@
<fsummary>Create a tar archive with options.</fsummary>
<type>
<v>Name = filename()</v>
- <v>FileList = [Filename|{NameInArchive, binary()},{NameInArchive,
- Filename}]</v>
+ <v>FileList = [Filename|{NameInArchive, FilenameOrBin}]</v>
+ <v>FilenameOrBin = filename()|binary()</v>
<v>Filename = filename()</v>
<v>NameInArchive = filename()</v>
<v>OptionList = [Option]</v>
@@ -275,7 +279,8 @@
<name>extract(Name) -> RetValue</name>
<fsummary>Extract all files from a tar file.</fsummary>
<type>
- <v>Name = filename()</v>
+ <v>Name = filename() | {binary,binary()} | {file,Fd}</v>
+ <v>Fd = file_descriptor()</v>
<v>RetValue = ok|{error,{Name,Reason}}</v>
<v>Reason = term()</v>
</type>
@@ -294,8 +299,7 @@
<name>extract(Name, OptionList)</name>
<fsummary>Extract files from a tar file.</fsummary>
<type>
- <v>Name = filename() | {binary,Binary} | {file,Fd}</v>
- <v>Binary = binary()</v>
+ <v>Name = filename() | {binary,binary()} | {file,Fd}</v>
<v>Fd = file_descriptor()</v>
<v>OptionList = [Option]</v>
<v>Option = {cwd,Cwd}|{files,FileList}|keep_old_files|verbose|memory</v>
@@ -521,7 +525,7 @@ erl_tar:close(TarDesc)</code>
<name>table(Name) -> RetValue</name>
<fsummary>Retrieve the name of all files in a tar file.</fsummary>
<type>
- <v>Name = filename()</v>
+ <v>Name = filename()|{binary,binary()}|{file,file_descriptor()}</v>
<v>RetValue = {ok,[string()]}|{error,{Name,Reason}}</v>
<v>Reason = term()</v>
</type>
@@ -535,7 +539,7 @@ erl_tar:close(TarDesc)</code>
<fsummary>Retrieve name and information of all files in a tar file.
</fsummary>
<type>
- <v>Name = filename()</v>
+ <v>Name = filename()|{binary,binary()}|{file,file_descriptor()}</v>
</type>
<desc>
<p>Retrieves the names of all files in the tar file <c>Name</c>.</p>
@@ -546,7 +550,7 @@ erl_tar:close(TarDesc)</code>
<name>t(Name)</name>
<fsummary>Print the name of each file in a tar file.</fsummary>
<type>
- <v>Name = filename()</v>
+ <v>Name = filename()|{binary,binary()}|{file,file_descriptor()}</v>
</type>
<desc>
<p>Prints the names of all files in the tar file <c>Name</c> to the
@@ -559,7 +563,7 @@ erl_tar:close(TarDesc)</code>
<fsummary>Print name and information for each file in a tar file.
</fsummary>
<type>
- <v>Name = filename()</v>
+ <v>Name = filename()|{binary,binary()}|{file,file_descriptor()}</v>
</type>
<desc>
<p>Prints names and information about all files in the tar file
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index d6c0ff8d8d..ed3dfb342c 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -130,7 +130,7 @@ HRL_FILES= \
../include/qlc.hrl \
../include/zip.hrl
-INTERNAL_HRL_FILES= dets.hrl
+INTERNAL_HRL_FILES= dets.hrl erl_tar.hrl
ERL_FILES= $(MODULES:%=%.erl)
@@ -228,7 +228,7 @@ $(EBIN)/dets_v9.beam: dets.hrl
$(EBIN)/erl_bits.beam: ../include/erl_bits.hrl
$(EBIN)/erl_compile.beam: ../include/erl_compile.hrl ../../kernel/include/file.hrl
$(EBIN)/erl_lint.beam: ../include/erl_bits.hrl
-$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl
+$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl erl_tar.hrl
$(EBIN)/file_sorter.beam: ../../kernel/include/file.hrl
$(EBIN)/filelib.beam: ../../kernel/include/file.hrl
$(EBIN)/filename.beam: ../../kernel/include/file.hrl
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index a383a0fc67..086e77cd28 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -1,8 +1,8 @@
%%
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 1997-2016. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 1997-2017. All Rights Reserved.
+%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
@@ -14,191 +14,245 @@
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
-%%
+%%
%% %CopyrightEnd%
%%
+%% This module implements extraction/creation of tar archives.
+%% It supports reading most common tar formats, namely V7, STAR,
+%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
+%% format, unless it must use PAX headers, in which case it produces PAX
+%% format.
+%%
+%% The following references where used:
+%% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
+%% http://www.gnu.org/software/tar/manual/html_node/Standard.html
+%% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
-module(erl_tar).
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Purpose: Unix tar (tape archive) utility.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
--export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2,
- open/2, close/1, add/3, add/4,
- t/1, tt/1, format_error/1]).
+-export([init/3,
+ create/2, create/3,
+ extract/1, extract/2,
+ table/1, table/2, t/1, tt/1,
+ open/2, close/1,
+ add/3, add/4,
+ format_error/1]).
-include_lib("kernel/include/file.hrl").
+-include_lib("erl_tar.hrl").
--record(add_opts,
- {read_info, % Fun to use for read file/link info.
- chunk_size = 0, % For file reading when sending to sftp. 0=do not chunk
- verbose = false :: boolean()}). % Verbose on/off.
-
-%% Opens a tar archive.
-
-init(UsrHandle, AccessMode, Fun) when is_function(Fun,2) ->
- {ok, {AccessMode,{tar_descriptor,UsrHandle,Fun}}}.
-
-%%%================================================================
-%%% The open function with friends is to keep the file and binary api of this module
-open(Name, Mode) ->
- case open_mode(Mode) of
- {ok, Access, Raw, Opts} ->
- open1(Name, Access, Raw, Opts);
- {error, Reason} ->
- {error, {Name, Reason}}
- end.
-
-open1({binary,Bin}, read, _Raw, Opts) ->
- case file:open(Bin, [ram,binary,read]) of
- {ok,File} ->
- _ = [ram_file:uncompress(File) || Opts =:= [compressed]],
- init(File,read,file_fun());
- Error ->
- Error
- end;
-open1({file, Fd}, read, _Raw, _Opts) ->
- init(Fd, read, file_fun());
-open1(Name, Access, Raw, Opts) ->
- case file:open(Name, Raw ++ [binary, Access|Opts]) of
- {ok, File} ->
- init(File, Access, file_fun());
- {error, Reason} ->
- {error, {Name, Reason}}
- end.
-
-file_fun() ->
- fun(write, {Fd,Data}) -> file:write(Fd, Data);
- (position, {Fd,Pos}) -> file:position(Fd, Pos);
- (read2, {Fd,Size}) -> file:read(Fd,Size);
- (close, Fd) -> file:close(Fd)
- end.
-
-%%% End of file and binary api (except for open_mode/1 downwards
-%%%================================================================
-
-%% Closes a tar archive.
-
-close({read, File}) ->
- ok = do_close(File);
-close({write, File}) ->
- PadResult = pad_file(File),
- ok = do_close(File),
- PadResult;
-close(_) ->
- {error, einval}.
-
-%% Adds a file to a tape archive.
-
-add(File, Name, Options) ->
- add(File, Name, Name, Options).
-add({write, File}, Name, NameInArchive, Options) ->
- Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end},
- add1(File, Name, NameInArchive, add_opts(Options, Opts));
-add({read, _File}, _, _, _) ->
- {error, eacces};
-add(_, _, _, _) ->
- {error, einval}.
-
-add_opts([dereference|T], Opts) ->
- add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end});
-add_opts([verbose|T], Opts) ->
- add_opts(T, Opts#add_opts{verbose=true});
-add_opts([{chunks,N}|T], Opts) ->
- add_opts(T, Opts#add_opts{chunk_size=N});
-add_opts([_|T], Opts) ->
- add_opts(T, Opts);
-add_opts([], Opts) ->
- Opts.
-
-%% Creates a tar file Name containing the given files.
-
-create(Name, Filenames) ->
- create(Name, Filenames, []).
-
-%% Creates a tar archive Name containing the given files.
-%% Accepted options: verbose, compressed, cooked
+%% Converts the short error reason to a descriptive string.
+-spec format_error(term()) -> string().
+format_error(invalid_tar_checksum) ->
+ "Checksum failed";
+format_error(bad_header) ->
+ "Unrecognized tar header format";
+format_error({bad_header, Reason}) ->
+ lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
+format_error({invalid_header, negative_size}) ->
+ "Invalid header: negative size";
+format_error(invalid_sparse_header_size) ->
+ "Invalid sparse header: negative size";
+format_error(invalid_sparse_map_entry) ->
+ "Invalid sparse map entry";
+format_error({invalid_sparse_map_entry, Reason}) ->
+ lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
+format_error(invalid_end_of_archive) ->
+ "Invalid end of archive";
+format_error(eof) ->
+ "Unexpected end of file";
+format_error(integer_overflow) ->
+ "Failed to parse numeric: integer overflow";
+format_error({misaligned_read, Pos}) ->
+ lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
+ [?BLOCK_SIZE, Pos]));
+format_error(invalid_gnu_1_0_sparsemap) ->
+ "Invalid GNU sparse map (version 1.0)";
+format_error({invalid_gnu_0_1_sparsemap, Format}) ->
+ lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
+format_error({Name,Reason}) ->
+ lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
+format_error(Atom) when is_atom(Atom) ->
+ file:format_error(Atom);
+format_error(Term) ->
+ lists:flatten(io_lib:format("~tp", [Term])).
-create(Name, FileList, Options) ->
- Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
- end, Options),
- case open(Name, [write|Mode]) of
- {ok, TarFile} ->
- Add = fun({NmInA, NmOrBin}) ->
- add(TarFile, NmOrBin, NmInA, Options);
- (Nm) ->
- add(TarFile, Nm, Nm, Options)
- end,
- Result = foreach_while_ok(Add, FileList),
- case {Result, close(TarFile)} of
- {ok, Res} -> Res;
- {Res, _} -> Res
- end;
- Reason ->
- Reason
- end.
+%% Initializes a new reader given a custom file handle and I/O wrappers
+-spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}.
+init(Handle, AccessMode, Fun) when is_function(Fun, 2) ->
+ Reader = #reader{handle=Handle,access=AccessMode,func=Fun},
+ {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
+ {ok, Reader2#reader{pos=Pos}};
+init(_Handle, _AccessMode, _Fun) ->
+ {error, badarg}.
+%%%================================================================
%% Extracts all files from the tar file Name.
-
+-spec extract(open_handle()) -> ok | {error, term()}.
extract(Name) ->
extract(Name, []).
%% Extracts (all) files from the tar file Name.
-%% Options accepted: keep_old_files, {files, ListOfFilesToExtract}, verbose,
-%% {cwd, AbsoluteDirectory}
+%% Options accepted:
+%% - cooked: Opens the tar file without mode `raw`
+%% - compressed: Uncompresses the tar file when reading
+%% - memory: Returns the tar contents as a list of tuples {Name, Bin}
+%% - keep_old_files: Extracted files will not overwrite the destination
+%% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
+%% - verbose: Prints verbose information about the extraction,
+%% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
+-spec extract(open_handle(), [extract_opt()]) ->
+ ok
+ | {ok, [{string(), binary()}]}
+ | {error, term()}.
+extract({binary, Bin}, Opts) when is_list(Opts) ->
+ do_extract({binary, Bin}, Opts);
+extract({file, Fd}, Opts) when is_list(Opts) ->
+ do_extract({file, Fd}, Opts);
+extract(#reader{}=Reader, Opts) when is_list(Opts) ->
+ do_extract(Reader, Opts);
+extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
+ do_extract(Name, Opts).
+
+do_extract(Handle, Opts) when is_list(Opts) ->
+ Opts2 = extract_opts(Opts),
+ Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
+ foldl_read(Handle, fun extract1/4, Acc, Opts2).
+
+extract1(eof, Reader, _, Acc) when is_list(Acc) ->
+ {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, Acc) ->
+ {ok, Acc, Reader};
+extract1(#tar_header{name=Name,size=Size}=Header, Reader, Opts, Acc) ->
+ case check_extract(Name, Opts) of
+ true ->
+ case do_read(Reader, Size) of
+ {ok, Bin, Reader2} ->
+ case write_extracted_element(Header, Bin, Opts) of
+ ok ->
+ {ok, Acc, Reader2};
+ {ok, NameBin} when is_list(Acc) ->
+ {ok, [NameBin | Acc], Reader2};
+ {error, _} = Err ->
+ throw(Err)
+ end;
+ {error, _} = Err ->
+ throw(Err)
+ end;
+ false ->
+ {ok, Acc, skip_file(Reader)}
+ end.
-extract(Name, Opts) ->
- foldl_read(Name, fun extract1/4, ok, extract_opts(Opts)).
+%% Checks if the file Name should be extracted.
+check_extract(_, #read_opts{files=all}) ->
+ true;
+check_extract(Name, #read_opts{files=Files}) ->
+ ordsets:is_element(Name, Files).
-%% Returns a list of names of the files in the tar file Name.
-%% Options accepted: verbose
+%%%================================================================
+%% The following table functions produce a list of information about
+%% the files contained in the archive.
+-type filename() :: string().
+-type typeflag() :: regular | link | symlink |
+ char | block | directory |
+ fifo | reserved | unknown.
+-type mode() :: non_neg_integer().
+-type uid() :: non_neg_integer().
+-type gid() :: non_neg_integer().
+
+-type tar_entry() :: {filename(),
+ typeflag(),
+ non_neg_integer(),
+ calendar:datetime(),
+ mode(),
+ uid(),
+ gid()}.
+%% Returns a list of names of the files in the tar file Name.
+-spec table(open_handle()) -> {ok, [string()]} | {error, term()}.
table(Name) ->
table(Name, []).
%% Returns a list of names of the files in the tar file Name.
%% Options accepted: compressed, verbose, cooked.
-
-table(Name, Opts) ->
+-spec table(open_handle(), [compressed | verbose | cooked]) ->
+ {ok, [tar_entry()]} | {error, term()}.
+table(Name, Opts) when is_list(Opts) ->
foldl_read(Name, fun table1/4, [], table_opts(Opts)).
+table1(eof, Reader, _, Result) ->
+ {ok, {ok, lists:reverse(Result)}, Reader};
+table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
+ Attrs = table1_attrs(Header, Verbose),
+ Reader2 = skip_file(Reader),
+ {ok, [Attrs|Result], Reader2}.
+
+%% Extracts attributes relevant to table1's output
+table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
+ Type = typeflag(Typeflag),
+ Name = Header#tar_header.name,
+ Mtime = Header#tar_header.mtime,
+ Uid = Header#tar_header.uid,
+ Gid = Header#tar_header.gid,
+ Size = Header#tar_header.size,
+ {Name, Type, Size, Mtime, Mode, Uid, Gid};
+table1_attrs(#tar_header{name=Name}, _Verbose) ->
+ Name.
+
+typeflag(?TYPE_REGULAR) -> regular;
+typeflag(?TYPE_REGULAR_A) -> regular;
+typeflag(?TYPE_GNU_SPARSE) -> regular;
+typeflag(?TYPE_CONT) -> regular;
+typeflag(?TYPE_LINK) -> link;
+typeflag(?TYPE_SYMLINK) -> symlink;
+typeflag(?TYPE_CHAR) -> char;
+typeflag(?TYPE_BLOCK) -> block;
+typeflag(?TYPE_DIR) -> directory;
+typeflag(?TYPE_FIFO) -> fifo;
+typeflag(_) -> unknown.
+%%%================================================================
%% Comments for printing the contents of a tape archive,
%% meant to be invoked from the shell.
-t(Name) ->
+%% Prints each filename in the archive
+-spec t(file:filename()) -> ok | {error, term()}.
+t(Name) when is_list(Name); is_binary(Name) ->
case table(Name) of
- {ok, List} ->
- lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
- Error ->
- Error
+ {ok, List} ->
+ lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
+ Error ->
+ Error
end.
+%% Prints verbose information about each file in the archive
+-spec tt(open_handle()) -> ok | {error, term()}.
tt(Name) ->
case table(Name, [verbose]) of
- {ok, List} ->
- lists:foreach(fun print_header/1, List);
- Error ->
- Error
+ {ok, List} ->
+ lists:foreach(fun print_header/1, List);
+ Error ->
+ Error
end.
+%% Used by tt/1 to print a tar_entry tuple
+-spec print_header(tar_entry()) -> ok.
print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
- [type_to_string(Type), mode_to_string(Mode),
- Uid, Gid, Size, time_to_string(Mtime), Name]).
+ [type_to_string(Type), mode_to_string(Mode),
+ Uid, Gid, Size, time_to_string(Mtime), Name]).
-type_to_string(regular) -> "-";
+type_to_string(regular) -> "-";
type_to_string(directory) -> "d";
-type_to_string(link) -> "l";
-type_to_string(symlink) -> "s";
-type_to_string(char) -> "c";
-type_to_string(block) -> "b";
-type_to_string(fifo) -> "f";
-type_to_string(_) -> "?".
-
+type_to_string(link) -> "l";
+type_to_string(symlink) -> "s";
+type_to_string(char) -> "c";
+type_to_string(block) -> "b";
+type_to_string(fifo) -> "f";
+type_to_string(unknown) -> "?".
+
+%% Converts a numeric mode to its human-readable representation
mode_to_string(Mode) ->
mode_to_string(Mode, "xwrxwrxwr", []).
-
mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
mode_to_string(Mode bsr 1, T, [C|Acc]);
mode_to_string(Mode, [_|T], Acc) ->
@@ -206,6 +260,7 @@ mode_to_string(Mode, [_|T], Acc) ->
mode_to_string(_, [], Acc) ->
Acc.
+%% Converts a datetime tuple to a readable string
time_to_string({{Y, Mon, Day}, {H, Min, _}}) ->
io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
@@ -225,809 +280,1608 @@ month(10) -> "Oct";
month(11) -> "Nov";
month(12) -> "Dec".
-%% Converts the short error reason to a descriptive string.
+%%%================================================================
+%% The open function with friends is to keep the file and binary api of this module
+-type open_handle() :: file:filename()
+ | {binary, binary()}
+ | {file, term()}.
+-spec open(open_handle(), [write | compressed | cooked]) ->
+ {ok, reader()} | {error, term()}.
+open({binary, Bin}, Mode) when is_binary(Bin) ->
+ do_open({binary, Bin}, Mode);
+open({file, Fd}, Mode) ->
+ do_open({file, Fd}, Mode);
+open(Name, Mode) when is_list(Name); is_binary(Name) ->
+ do_open(Name, Mode).
+
+do_open(Name, Mode) when is_list(Mode) ->
+ case open_mode(Mode) of
+ {ok, Access, Raw, Opts} ->
+ open1(Name, Access, Raw, Opts);
+ {error, Reason} ->
+ {error, {Name, Reason}}
+ end.
-format_error(bad_header) -> "Bad directory header";
-format_error(eof) -> "Unexpected end of file";
-format_error(symbolic_link_too_long) -> "Symbolic link too long";
-format_error({Name,Reason}) ->
- lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
-format_error(Atom) when is_atom(Atom) ->
- file:format_error(Atom);
-format_error(Term) ->
- lists:flatten(io_lib:format("~tp", [Term])).
+open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) ->
+ case file:open(Bin, [ram,binary,read]) of
+ {ok,File} ->
+ _ = [ram_file:uncompress(File) || Opts =:= [compressed]],
+ {ok, #reader{handle=File,access=read,func=fun file_op/2}};
+ Error ->
+ Error
+ end;
+open1({file, Fd}, read, _Raw, _Opts) ->
+ Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
+ case do_position(Reader, {cur, 0}) of
+ {ok, Pos, Reader2} ->
+ {ok, Reader2#reader{pos=Pos}};
+ {error, _} = Err ->
+ Err
+ end;
+open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
+ case file:open(Name, Raw ++ [binary, Access|Opts]) of
+ {ok, File} ->
+ {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
+ {error, Reason} ->
+ {error, {Name, Reason}}
+ end.
+open_mode(Mode) ->
+ open_mode(Mode, false, [raw], []).
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% Useful definitions (also start of implementation).
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Offset for fields in the tar header.
-%% Note that these offsets are ZERO-based as in the POSIX standard
-%% document, while binaries use ONE-base offset. Caveat Programmer.
-
--define(th_name, 0).
--define(th_mode, 100).
--define(th_uid, 108).
--define(th_gid, 116).
--define(th_size, 124).
--define(th_mtime, 136).
--define(th_chksum, 148).
--define(th_typeflag, 156).
--define(th_linkname, 157).
--define(th_magic, 257).
--define(th_version, 263).
--define(th_prefix, 345).
-
-%% Length of these fields.
-
--define(th_name_len, 100).
--define(th_mode_len, 8).
--define(th_uid_len, 8).
--define(th_gid_len, 8).
--define(th_size_len, 12).
--define(th_mtime_len, 12).
--define(th_chksum_len, 8).
--define(th_linkname_len, 100).
--define(th_magic_len, 6).
--define(th_version_len, 2).
--define(th_prefix_len, 167).
-
--record(tar_header,
- {name, % Name of file.
- mode, % Mode bits.
- uid, % User id.
- gid, % Group id.
- size, % Size of file
- mtime, % Last modified (seconds since
- % Jan 1, 1970).
- chksum, % Checksum of header.
- typeflag = [], % Type of file.
- linkname = [], % Name of link.
- filler = [],
- prefix}). % Filename prefix.
-
--define(record_size, 512).
--define(block_size, (512*20)).
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% Adding members to a tar archive.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-add1(TarFile, Bin, NameInArchive, Opts) when is_binary(Bin) ->
- Now = calendar:now_to_local_time(erlang:timestamp()),
- Info = #file_info{size = byte_size(Bin),
- type = regular,
- access = read_write,
- atime = Now,
- mtime = Now,
- ctime = Now,
- mode = 8#100644,
- links = 1,
- major_device = 0,
- minor_device = 0,
- inode = 0,
- uid = 0,
- gid = 0},
- Header = create_header(NameInArchive, Info),
- add1(TarFile, NameInArchive, Header, Bin, Opts);
-add1(TarFile, Name, NameInArchive, Opts) ->
- case read_file_and_info(Name, Opts) of
- {ok, Bin, Info} when Info#file_info.type =:= regular ->
- Header = create_header(NameInArchive, Info),
- add1(TarFile, Name, Header, Bin, Opts);
- {ok, PointsTo, Info} when Info#file_info.type =:= symlink ->
- if
- length(PointsTo) > 100 ->
- {error,{PointsTo,symbolic_link_too_long}};
- true ->
- Info2 = Info#file_info{size=0},
- Header = create_header(NameInArchive, Info2, PointsTo),
- add1(TarFile, Name, Header, list_to_binary([]), Opts)
- end;
- {ok, _, Info} when Info#file_info.type =:= directory ->
- add_directory(TarFile, Name, NameInArchive, Info, Opts);
- {ok, _, #file_info{type=Type}} ->
- {error, {bad_file_type, Name, Type}};
- {error, Reason} ->
- {error, {Name, Reason}}
+open_mode(read, _, Raw, _) ->
+ {ok, read, Raw, []};
+open_mode(write, _, Raw, _) ->
+ {ok, write, Raw, []};
+open_mode([read|Rest], false, Raw, Opts) ->
+ open_mode(Rest, read, Raw, Opts);
+open_mode([write|Rest], false, Raw, Opts) ->
+ open_mode(Rest, write, Raw, Opts);
+open_mode([compressed|Rest], Access, Raw, Opts) ->
+ open_mode(Rest, Access, Raw, [compressed|Opts]);
+open_mode([cooked|Rest], Access, _Raw, Opts) ->
+ open_mode(Rest, Access, [], Opts);
+open_mode([], Access, Raw, Opts) ->
+ {ok, Access, Raw, Opts};
+open_mode(_, _, _, _) ->
+ {error, einval}.
+
+file_op(write, {Fd, Data}) ->
+ file:write(Fd, Data);
+file_op(position, {Fd, Pos}) ->
+ file:position(Fd, Pos);
+file_op(read2, {Fd, Size}) ->
+ file:read(Fd, Size);
+file_op(close, Fd) ->
+ file:close(Fd).
+
+%% Closes a tar archive.
+-spec close(reader()) -> ok | {error, term()}.
+close(#reader{access=read}=Reader) ->
+ ok = do_close(Reader);
+close(#reader{access=write}=Reader) ->
+ {ok, Reader2} = pad_file(Reader),
+ ok = do_close(Reader2),
+ ok;
+close(_) ->
+ {error, einval}.
+
+pad_file(#reader{pos=Pos}=Reader) ->
+ %% There must be at least two zero blocks at the end.
+ PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
+ Padding = <<0:PadCurrent/unit:8>>,
+ do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
+
+
+%%%================================================================
+%% Creation/modification of tar archives
+
+%% Creates a tar file Name containing the given files.
+-spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}.
+create(Name, FileList) when is_list(Name); is_binary(Name) ->
+ create(Name, FileList, []).
+
+%% Creates a tar archive Name containing the given files.
+%% Accepted options: verbose, compressed, cooked
+-spec create(file:filename(), filelist(), [create_opt()]) ->
+ ok | {error, term()} | {error, {string(), term()}}.
+create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
+ Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
+ end, Options),
+ case open(Name, [write|Mode]) of
+ {ok, TarFile} ->
+ do_create(TarFile, FileList, Options);
+ {error, _} = Err ->
+ Err
end.
-add1(Tar, Name, Header, chunked, Options) ->
- add_verbose(Options, "a ~ts [chunked ", [Name]),
- try
- ok = do_write(Tar, Header),
- {ok,D} = file:open(Name, [read,binary]),
- {ok,NumBytes} = add_read_write_chunks(D, Tar, Options#add_opts.chunk_size, 0, Options),
- _ = file:close(D),
- ok = do_write(Tar, padding(NumBytes,?record_size))
- of
- ok ->
- add_verbose(Options, "~n", []),
- ok
- catch
- error:{badmatch,{error,Error}} ->
- add_verbose(Options, "~n", []),
- {error,{Name,Error}}
+do_create(TarFile, [], _Opts) ->
+ close(TarFile);
+do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
+ case add(TarFile, NameOrBin, NameInArchive, Opts) of
+ ok ->
+ do_create(TarFile, Rest, Opts);
+ {error, _} = Err ->
+ _ = close(TarFile),
+ Err
end;
-add1(Tar, Name, Header, Bin, Options) ->
- add_verbose(Options, "a ~ts~n", [Name]),
- do_write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]).
-
-add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes, Options) ->
- case file:read(D, ChunkSize) of
- {ok,Bin} ->
- ok = do_write(Tar, Bin),
- add_verbose(Options, ".", []),
- add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes+byte_size(Bin), Options);
- eof ->
- add_verbose(Options, "]", []),
- {ok,SumNumBytes};
- Other ->
- Other
+do_create(TarFile, [Name|Rest], Opts) ->
+ case add(TarFile, Name, Name, Opts) of
+ ok ->
+ do_create(TarFile, Rest, Opts);
+ {error, _} = Err ->
+ _ = close(TarFile),
+ Err
end.
-add_directory(TarFile, DirName, NameInArchive, Info, Options) ->
+%% Adds a file to a tape archive.
+-type add_type() :: string()
+ | {string(), string()}
+ | {string(), binary()}.
+-spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}.
+add(Reader, {NameInArchive, Name}, Opts)
+ when is_list(NameInArchive), is_list(Name) ->
+ do_add(Reader, Name, NameInArchive, Opts);
+add(Reader, {NameInArchive, Bin}, Opts)
+ when is_list(NameInArchive), is_binary(Bin) ->
+ do_add(Reader, Bin, NameInArchive, Opts);
+add(Reader, Name, Opts) when is_list(Name) ->
+ do_add(Reader, Name, Name, Opts).
+
+
+-spec add(reader(), string() | binary(), string(), [add_opt()]) ->
+ ok | {error, term()}.
+add(Reader, NameOrBin, NameInArchive, Options)
+ when is_list(NameOrBin); is_binary(NameOrBin),
+ is_list(NameInArchive), is_list(Options) ->
+ do_add(Reader, NameOrBin, NameInArchive, Options).
+
+do_add(#reader{access=write}=Reader, Name, NameInArchive, Options)
+ when is_list(NameInArchive), is_list(Options) ->
+ Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end},
+ add1(Reader, Name, NameInArchive, add_opts(Options, Opts));
+do_add(#reader{access=read},_,_,_) ->
+ {error, eacces};
+do_add(Reader,_,_,_) ->
+ {error, {badarg, Reader}}.
+
+add_opts([dereference|T], Opts) ->
+ add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end});
+add_opts([verbose|T], Opts) ->
+ add_opts(T, Opts#add_opts{verbose=true});
+add_opts([{chunks,N}|T], Opts) ->
+ add_opts(T, Opts#add_opts{chunk_size=N});
+add_opts([_|T], Opts) ->
+ add_opts(T, Opts);
+add_opts([], Opts) ->
+ Opts.
+
+add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts)
+ when is_list(Name) ->
+ Res = case ReadInfo(Name) of
+ {error, Reason0} ->
+ {error, {Name, Reason0}};
+ {ok, #file_info{type=symlink}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ {ok, Linkname} = file:read_link(Name),
+ Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
+ add_header(Reader, Header, Opts);
+ {ok, #file_info{type=regular}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Header = fileinfo_to_header(NameInArchive, Fi, false),
+ {ok, Reader2} = add_header(Reader, Header, Opts),
+ FileSize = Header#tar_header.size,
+ {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
+ Padding = skip_padding(FileSize),
+ Pad = <<0:Padding/unit:8>>,
+ do_write(Reader3, Pad);
+ {ok, #file_info{type=directory}=Fi} ->
+ add_directory(Reader, Name, NameInArchive, Fi, Opts);
+ {ok, #file_info{}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Header = fileinfo_to_header(NameInArchive, Fi, false),
+ add_header(Reader, Header, Opts)
+ end,
+ case Res of
+ ok -> ok;
+ {ok, _Reader} -> ok;
+ {error, _Reason} = Err -> Err
+ end;
+add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Now = calendar:now_to_local_time(erlang:timestamp()),
+ Header = #tar_header{
+ name = NameInArchive,
+ size = byte_size(Bin),
+ typeflag = ?TYPE_REGULAR,
+ atime = Now,
+ mtime = Now,
+ ctime = Now,
+ mode = 8#100644},
+ {ok, Reader2} = add_header(Reader, Header, Opts),
+ Padding = skip_padding(byte_size(Bin)),
+ Data = [Bin, <<0:Padding/unit:8>>],
+ case do_write(Reader2, Data) of
+ {ok, _Reader3} -> ok;
+ {error, Reason} -> {error, {NameInArchive, Reason}}
+ end.
+
+add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
case file:list_dir(DirName) of
- {ok, []} ->
- add_verbose(Options, "a ~ts~n", [DirName]),
- Header = create_header(NameInArchive, Info),
- do_write(TarFile, Header);
- {ok, Files} ->
- Add = fun (File) ->
- add1(TarFile,
- filename:join(DirName, File),
- filename:join(NameInArchive, File),
- Options) end,
- foreach_while_ok(Add, Files);
- {error, Reason} ->
- {error, {DirName, Reason}}
+ {ok, []} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Header = fileinfo_to_header(NameInArchive, Info, false),
+ add_header(Reader, Header, Opts);
+ {ok, Files} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ try add_files(Reader, Files, DirName, NameInArchive, Opts) of
+ ok -> ok;
+ {error, _} = Err -> Err
+ catch
+ throw:{error, {_Name, _Reason}} = Err -> Err;
+ throw:{error, Reason} -> {error, {DirName, Reason}}
+ end;
+ {error, Reason} ->
+ {error, {DirName, Reason}}
end.
-
-%% Creates a header for file in a tar file.
-
-create_header(Name, Info) ->
- create_header(Name, Info, []).
-create_header(Name, #file_info {mode=Mode, uid=Uid, gid=Gid,
- size=Size, mtime=Mtime0, type=Type}, Linkname) ->
- Mtime = posix_time(erlang:localtime_to_universaltime(Mtime0)),
- {Prefix,Suffix} = split_filename(Name),
- H0 = [to_string(Suffix, 100),
- to_octal(Mode, 8),
- to_octal(Uid, 8),
- to_octal(Gid, 8),
- to_octal(Size, ?th_size_len),
- to_octal(Mtime, ?th_mtime_len),
- <<" ">>,
- file_type(Type),
- to_string(Linkname, ?th_linkname_len),
- "ustar",0,
- "00",
- zeroes(?th_prefix-?th_version-?th_version_len),
- to_string(Prefix, ?th_prefix_len)],
- H = list_to_binary(H0),
- 512 = byte_size(H), %Assertion.
- ChksumString = to_octal(checksum(H), 6, [0,$\s]),
- <<Before:?th_chksum/binary,_:?th_chksum_len/binary,After/binary>> = H,
- [Before,ChksumString,After].
-
-file_type(regular) -> $0;
-file_type(symlink) -> $2;
-file_type(directory) -> $5.
-
-to_octal(Int, Count) when Count > 1 ->
- to_octal(Int, Count-1, [0]).
-
-to_octal(_, 0, Result) -> Result;
-to_octal(Int, Count, Result) ->
- to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]).
-
-to_string(Str0, Count) ->
- Str = case file:native_name_encoding() of
- utf8 ->
- unicode:characters_to_binary(Str0);
- latin1 ->
- list_to_binary(Str0)
- end,
- case byte_size(Str) of
- Size when Size < Count ->
- [Str|zeroes(Count-Size)];
- _ -> Str
+
+add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
+ ok;
+add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
+ FullName = filename:join(Dir, Name),
+ NameInArchive = filename:join(DirInArchive, Name),
+ Res = case Info(FullName) of
+ {error, Reason} ->
+ {error, {FullName, Reason}};
+ {ok, #file_info{type=directory}=Fi} ->
+ add_directory(Reader, FullName, NameInArchive, Fi, Opts);
+ {ok, #file_info{type=symlink}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ {ok, Linkname} = file:read_link(FullName),
+ Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
+ add_header(Reader, Header, Opts);
+ {ok, #file_info{type=regular}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Header = fileinfo_to_header(NameInArchive, Fi, false),
+ {ok, Reader2} = add_header(Reader, Header, Opts),
+ FileSize = Header#tar_header.size,
+ {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
+ Padding = skip_padding(FileSize),
+ Pad = <<0:Padding/unit:8>>,
+ do_write(Reader3, Pad);
+ {ok, #file_info{}=Fi} ->
+ add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+ Header = fileinfo_to_header(NameInArchive, Fi, false),
+ add_header(Reader, Header, Opts)
+ end,
+ case Res of
+ ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
+ {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
+ {error, _} = Err -> Err
end.
-%% Pads out end of file.
-
-pad_file(File) ->
- {ok,Position} = do_position(File, {cur,0}),
- %% There must be at least two zero records at the end.
- Fill = case ?block_size - (Position rem ?block_size) of
- Fill0 when Fill0 < 2*?record_size ->
- %% We need to another block here to ensure that there
- %% are at least two zero records at the end.
- Fill0 + ?block_size;
- Fill0 ->
- %% Large enough.
- Fill0
- end,
- do_write(File, zeroes(Fill)).
-
-split_filename(Name) when length(Name) =< ?th_name_len ->
- {"", Name};
-split_filename(Name0) ->
- split_filename(lists:reverse(filename:split(Name0)), [], [], 0).
-
-split_filename([Comp|Rest], Prefix, Suffix, Len)
- when Len+length(Comp) < ?th_name_len ->
- split_filename(Rest, Prefix, [Comp|Suffix], Len+length(Comp)+1);
-split_filename([Comp|Rest], Prefix, Suffix, Len) ->
- split_filename(Rest, [Comp|Prefix], Suffix, Len+length(Comp)+1);
-split_filename([], Prefix, Suffix, _) ->
- {filename:join(Prefix),filename:join(Suffix)}.
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% Retrieving files from a tape archive.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Options used when reading a tar archive.
-
--record(read_opts,
- {cwd :: string(), % Current working directory.
- keep_old_files = false :: boolean(), % Owerwrite or not.
- files = all, % Set of files to extract
- % (or all).
- output = file :: 'file' | 'memory',
- open_mode = [], % Open mode options.
- verbose = false :: boolean()}). % Verbose on/off.
+format_string(String, Size) when length(String) > Size ->
+ throw({error, {write_string, field_too_long}});
+format_string(String, Size) ->
+ Ascii = to_ascii(String),
+ if byte_size(Ascii) < Size ->
+ [Ascii, 0];
+ true ->
+ Ascii
+ end.
-extract_opts(List) ->
- extract_opts(List, default_options()).
+format_octal(Octal) ->
+ iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
+
+add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
+ {ok, Iodata} = build_header(Header, Opts),
+ do_write(Reader, Iodata).
+
+write_to_block(Block, IoData, Start) when is_list(IoData) ->
+ write_to_block(Block, iolist_to_binary(IoData), Start);
+write_to_block(Block, Bin, Start) when is_binary(Bin) ->
+ Size = byte_size(Bin),
+ <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
+ <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
+
+build_header(#tar_header{}=Header, Opts) ->
+ #tar_header{
+ name=Name,
+ mode=Mode,
+ uid=Uid,
+ gid=Gid,
+ size=Size,
+ typeflag=Type,
+ linkname=Linkname,
+ uname=Uname,
+ gname=Gname,
+ devmajor=Devmaj,
+ devminor=Devmin
+ } = Header,
+ Mtime = datetime_to_posix(Header#tar_header.mtime),
+
+ Block0 = ?ZERO_BLOCK,
+ {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
+ Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
+ {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
+ {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
+ {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
+ {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
+ {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
+ {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
+ Linkname, ?PAX_LINKPATH, Pax5),
+ {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
+ Uname, ?PAX_UNAME, Pax6),
+ {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
+ Gname, ?PAX_GNAME, Pax7),
+ {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
+ Devmaj, ?PAX_NONE, Pax8),
+ {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
+ Devmin, ?PAX_NONE, Pax9),
+ {Block13, Pax11} = set_path(Block12, Pax10),
+ PaxEntry = case maps:size(Pax11) of
+ 0 -> [];
+ _ -> build_pax_entry(Header, Pax11, Opts)
+ end,
+ Block14 = set_format(Block13, ?FORMAT_USTAR),
+ Block15 = set_checksum(Block14),
+ {ok, [PaxEntry, Block15]}.
+
+set_path(Block0, Pax) ->
+ %% only use ustar header when name is too long
+ case maps:get(?PAX_PATH, Pax, nil) of
+ nil ->
+ {Block0, Pax};
+ PaxPath ->
+ case split_ustar_path(PaxPath) of
+ {ok, UstarName, UstarPrefix} ->
+ {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
+ UstarName, ?PAX_NONE, #{}),
+ {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
+ UstarPrefix, ?PAX_NONE, #{}),
+ {Block2, maps:remove(?PAX_PATH, Pax)};
+ false ->
+ {Block0, Pax}
+ end
+ end.
-table_opts(List) ->
- read_opts(List, default_options()).
+set_format(Block0, Format)
+ when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
+ Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
+ write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
+set_format(_Block, Format) ->
+ throw({error, {invalid_format, Format}}).
+
+set_checksum(Block) ->
+ Checksum = compute_checksum(Block),
+ write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
+
+build_pax_entry(Header, PaxAttrs, Opts) ->
+ Path = Header#tar_header.name,
+ Filename = filename:basename(Path),
+ Dir = filename:dirname(Path),
+ Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
+ AsciiPath = to_ascii(Path2),
+ Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
+ binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
+ true ->
+ AsciiPath
+ end,
+ Keys = maps:keys(PaxAttrs),
+ SortedKeys = lists:sort(Keys),
+ PaxFile = build_pax_file(SortedKeys, PaxAttrs),
+ Size = byte_size(PaxFile),
+ Padding = (?BLOCK_SIZE -
+ (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
+ Pad = <<0:Padding/unit:8>>,
+ PaxHeader = #tar_header{
+ name=unicode:characters_to_list(Path3),
+ size=Size,
+ mtime=Header#tar_header.mtime,
+ atime=Header#tar_header.atime,
+ ctime=Header#tar_header.ctime,
+ typeflag=?TYPE_X_HEADER
+ },
+ {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
+ [PaxHeaderData, PaxFile, Pad].
+
+build_pax_file(Keys, PaxAttrs) ->
+ build_pax_file(Keys, PaxAttrs, []).
+build_pax_file([], _, Acc) ->
+ unicode:characters_to_binary(Acc);
+build_pax_file([K|Rest], Attrs, Acc) ->
+ V = maps:get(K, Attrs),
+ Size = sizeof(K) + sizeof(V) + 3,
+ Size2 = sizeof(Size) + Size,
+ Key = to_string(K),
+ Value = to_string(V),
+ Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
+ if byte_size(Record) =/= Size2 ->
+ Size3 = byte_size(Record),
+ Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
+ build_pax_file(Rest, Attrs, [Acc, Record2]);
+ true ->
+ build_pax_file(Rest, Attrs, [Acc, Record])
+ end.
-default_options() ->
- {ok, Cwd} = file:get_cwd(),
- #read_opts{cwd=Cwd}.
+sizeof(Bin) when is_binary(Bin) ->
+ byte_size(Bin);
+sizeof(List) when is_list(List) ->
+ length(List);
+sizeof(N) when is_integer(N) ->
+ byte_size(integer_to_binary(N));
+sizeof(N) when is_float(N) ->
+ byte_size(float_to_binary(N)).
+
+to_string(Bin) when is_binary(Bin) ->
+ unicode:characters_to_list(Bin);
+to_string(List) when is_list(List) ->
+ List;
+to_string(N) when is_integer(N) ->
+ integer_to_list(N);
+to_string(N) when is_float(N) ->
+ float_to_list(N).
+
+split_ustar_path(Path) ->
+ Len = length(Path),
+ NotAscii = not is_ascii(Path),
+ if Len =< ?V7_NAME_LEN; NotAscii ->
+ false;
+ true ->
+ PathBin = binary:list_to_bin(Path),
+ case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
+ [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
+ false;
+ Parts ->
+ case lists:last(Parts) of
+ Name when byte_size(Name) >= ?V7_NAME_LEN ->
+ false;
+ Name ->
+ Parts2 = lists:sublist(Parts, length(Parts) - 1),
+ join_split_ustar_path(Parts2, {ok, Name, nil})
+ end
+ end
+ end.
-%% Parse options for extract.
+join_split_ustar_path([], Acc) ->
+ Acc;
+join_split_ustar_path([Part|_], {ok, _, nil})
+ when byte_size(Part) > ?USTAR_PREFIX_LEN ->
+ false;
+join_split_ustar_path([Part|_], {ok, _Name, Acc})
+ when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
+ false;
+join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
+ join_split_ustar_path(Rest, {ok, Name, Part});
+join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
+ join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
+
+datetime_to_posix(DateTime) ->
+ Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
+ Secs = calendar:datetime_to_gregorian_seconds(DateTime),
+ case Secs - Epoch of
+ N when N < 0 -> 0;
+ N -> N
+ end.
-extract_opts([keep_old_files|Rest], Opts) ->
- extract_opts(Rest, Opts#read_opts{keep_old_files=true});
-extract_opts([{cwd, Cwd}|Rest], Opts) ->
- extract_opts(Rest, Opts#read_opts{cwd=Cwd});
-extract_opts([{files, Files}|Rest], Opts) ->
- Set = ordsets:from_list(Files),
- extract_opts(Rest, Opts#read_opts{files=Set});
-extract_opts([memory|Rest], Opts) ->
- extract_opts(Rest, Opts#read_opts{output=memory});
-extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
- extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
-extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
- extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
-extract_opts([verbose|Rest], Opts) ->
- extract_opts(Rest, Opts#read_opts{verbose=true});
-extract_opts([Other|Rest], Opts) ->
- extract_opts(Rest, read_opts([Other], Opts));
-extract_opts([], Opts) ->
- Opts.
+write_octal(Block, Pos, Size, X) ->
+ Octal = zero_pad(format_octal(X), Size-1),
+ if byte_size(Octal) < Size ->
+ write_to_block(Block, Octal, Pos);
+ true ->
+ throw({error, {write_failed, octal_field_too_long}})
+ end.
-%% Common options for all read operations.
+write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
+ NotAscii = not is_ascii(Str),
+ if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
+ Pax1 = maps:put(PaxAttr, Str, Pax0),
+ {Block, Pax1};
+ true ->
+ Formatted = format_string(Str, Size),
+ {write_to_block(Block, Formatted, Pos), Pax0}
+ end.
+write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
+ %% attempt octal
+ Octal = zero_pad(format_octal(X), Size-1),
+ if byte_size(Octal) < Size ->
+ {write_to_block(Block, [Octal, 0], Pos), Pax0};
+ PaxAttr =/= ?PAX_NONE ->
+ Pax1 = maps:put(PaxAttr, X, Pax0),
+ {Block, Pax1};
+ true ->
+ throw({error, {write_failed, numeric_field_too_long}})
+ end.
-read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
- read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
-read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
- read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
-read_opts([verbose|Rest], Opts) ->
- read_opts(Rest, Opts#read_opts{verbose=true});
-read_opts([_|Rest], Opts) ->
- read_opts(Rest, Opts);
-read_opts([], Opts) ->
- Opts.
+zero_pad(Str, Size) when byte_size(Str) >= Size ->
+ Str;
+zero_pad(Str, Size) ->
+ Padding = Size - byte_size(Str),
+ Pad = binary:copy(<<$0>>, Padding),
+ <<Pad/binary, Str/binary>>.
-foldl_read({AccessMode,TD={tar_descriptor,_UsrHandle,_AccessFun}}, Fun, Accu, Opts) ->
- case AccessMode of
- read ->
- foldl_read0(TD, Fun, Accu, Opts);
- _ ->
- {error,{read_mode_expected,AccessMode}}
- end;
-foldl_read(TarName, Fun, Accu, Opts) ->
- case open(TarName, [read|Opts#read_opts.open_mode]) of
- {ok, {read, File}} ->
- Result = foldl_read0(File, Fun, Accu, Opts),
- ok = do_close(File),
- Result;
- Error ->
- Error
+
+%%%================================================================
+%% Functions for creating or modifying tar archives
+
+read_block(Reader) ->
+ case do_read(Reader, ?BLOCK_SIZE) of
+ eof ->
+ throw({error, eof});
+ %% Two zero blocks mark the end of the archive
+ {ok, ?ZERO_BLOCK, Reader1} ->
+ case do_read(Reader1, ?BLOCK_SIZE) of
+ eof ->
+ % This is technically a malformed end-of-archive marker,
+ % as two ZERO_BLOCKs are expected as the marker,
+ % but if we've already made it this far, we should just ignore it
+ eof;
+ {ok, ?ZERO_BLOCK, _Reader2} ->
+ eof;
+ {ok, _Block, _Reader2} ->
+ throw({error, invalid_end_of_archive});
+ {error,_} = Err ->
+ throw(Err)
+ end;
+ {ok, Block, Reader1} when is_binary(Block) ->
+ {ok, Block, Reader1};
+ {error, _} = Err ->
+ throw(Err)
end.
-foldl_read0(File, Fun, Accu, Opts) ->
- case catch foldl_read1(Fun, Accu, File, Opts) of
- {'EXIT', Reason} ->
- exit(Reason);
- {error, {Reason, Format, Args}} ->
- read_verbose(Opts, Format, Args),
- {error, Reason};
- {error, Reason} ->
- {error, Reason};
- Ok ->
- Ok
+get_header(#reader{}=Reader) ->
+ case read_block(Reader) of
+ eof ->
+ eof;
+ {ok, Block, Reader1} ->
+ convert_header(Block, Reader1)
end.
-foldl_read1(Fun, Accu0, File, Opts) ->
- case get_header(File) of
- eof ->
- Fun(eof, File, Opts, Accu0);
- Header ->
- {ok, NewAccu} = Fun(Header, File, Opts, Accu0),
- foldl_read1(Fun, NewAccu, File, Opts)
+%% Converts the tar header to a record.
+to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ #header_v7{
+ name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
+ mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
+ uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
+ gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
+ size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
+ mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
+ checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
+ typeflag=binary:at(Bin, ?V7_TYPE),
+ linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
+ };
+to_v7(_) ->
+ {error, header_block_too_small}.
+
+to_gnu(#header_v7{}=V7, Bin)
+ when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ #header_gnu{
+ header_v7=V7,
+ magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
+ version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
+ uname=binary_part(Bin, 265, 32),
+ gname=binary_part(Bin, 297, 32),
+ devmajor=binary_part(Bin, 329, 8),
+ devminor=binary_part(Bin, 337, 8),
+ atime=binary_part(Bin, 345, 12),
+ ctime=binary_part(Bin, 357, 12),
+ sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
+ real_size=binary_part(Bin, 483, 12)
+ }.
+
+to_star(#header_v7{}=V7, Bin)
+ when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ #header_star{
+ header_v7=V7,
+ magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
+ version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
+ uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
+ gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
+ devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
+ devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
+ prefix=binary_part(Bin, 345, 131),
+ atime=binary_part(Bin, 476, 12),
+ ctime=binary_part(Bin, 488, 12),
+ trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
+ }.
+
+to_ustar(#header_v7{}=V7, Bin)
+ when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ #header_ustar{
+ header_v7=V7,
+ magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
+ version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
+ uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
+ gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
+ devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
+ devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
+ prefix=binary_part(Bin, 345, 155)
+ }.
+
+to_sparse_array(Bin) when is_binary(Bin) ->
+ MaxEntries = byte_size(Bin) div 24,
+ IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
+ Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
+ #sparse_array{
+ entries=Entries,
+ max_entries=MaxEntries,
+ is_extended=IsExtended
+ }.
+
+parse_sparse_entries(<<>>, _, Acc) ->
+ Acc;
+parse_sparse_entries(_, -1, Acc) ->
+ Acc;
+parse_sparse_entries(Bin, N, Acc) ->
+ case to_sparse_entry(binary_part(Bin, N*24, 24)) of
+ nil ->
+ parse_sparse_entries(Bin, N-1, Acc);
+ Entry = #sparse_entry{} ->
+ parse_sparse_entries(Bin, N-1, [Entry|Acc])
end.
-table1(eof, _, _, Result) ->
- {ok, lists:reverse(Result)};
-table1(Header = #tar_header{}, File, #read_opts{verbose=true}, Result) ->
- #tar_header{name=Name, size=Size, mtime=Mtime, typeflag=Type,
- mode=Mode, uid=Uid, gid=Gid} = Header,
- skip(File, Size),
- {ok, [{Name, Type, Size, posix_to_erlang_time(Mtime), Mode, Uid, Gid}|Result]};
-table1(#tar_header{name=Name, size=Size}, File, _, Result) ->
- skip(File, Size),
- {ok, [Name|Result]}.
-
-extract1(eof, _, _, Acc) ->
- if
- is_list(Acc) ->
- {ok, lists:reverse(Acc)};
- true ->
- Acc
- end;
-extract1(Header, File, Opts, Acc) ->
- Name = Header#tar_header.name,
- case check_extract(Name, Opts) of
- true ->
- {ok, Bin} = get_element(File, Header),
- case write_extracted_element(Header, Bin, Opts) of
- ok ->
- {ok, Acc};
- {ok, NameBin} when is_list(Acc) ->
- {ok, [NameBin | Acc]};
- {ok, NameBin} when Acc =:= ok ->
- {ok, [NameBin]}
- end;
- false ->
- ok = skip(File, Header#tar_header.size),
- {ok, Acc}
+-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
+to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
+ OffsetBin = binary_part(Bin, 0, 12),
+ NumBytesBin = binary_part(Bin, 12, 12),
+ case {OffsetBin, NumBytesBin} of
+ {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
+ nil;
+ _ ->
+ #sparse_entry{
+ offset=parse_numeric(OffsetBin),
+ num_bytes=parse_numeric(NumBytesBin)}
end.
-%% Checks if the file Name should be extracted.
+-spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
+ | ?FORMAT_UNKNOWN
+ | {error, term()}.
+get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ do_get_format(to_v7(Bin), Bin).
+
+do_get_format({error, _} = Err, _Bin) ->
+ Err;
+do_get_format(#header_v7{}=V7, Bin)
+ when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ Checksum = parse_octal(V7#header_v7.checksum),
+ Chk1 = compute_checksum(Bin),
+ Chk2 = compute_signed_checksum(Bin),
+ if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
+ ?FORMAT_UNKNOWN;
+ true ->
+ %% guess magic
+ Ustar = to_ustar(V7, Bin),
+ Star = to_star(V7, Bin),
+ Magic = Ustar#header_ustar.magic,
+ Version = Ustar#header_ustar.version,
+ Trailer = Star#header_star.trailer,
+ Format = if
+ Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
+ ?FORMAT_STAR;
+ Magic =:= ?MAGIC_USTAR ->
+ ?FORMAT_USTAR;
+ Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
+ ?FORMAT_GNU;
+ true ->
+ ?FORMAT_V7
+ end,
+ {ok, Format, V7}
+ end.
-check_extract(_, #read_opts{files=all}) ->
+unpack_format(Format, #header_v7{}=V7, Bin, Reader)
+ when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+ Mtime = posix_to_erlang_time(parse_numeric(V7#header_v7.mtime)),
+ Header0 = #tar_header{
+ name=parse_string(V7#header_v7.name),
+ mode=parse_numeric(V7#header_v7.mode),
+ uid=parse_numeric(V7#header_v7.uid),
+ gid=parse_numeric(V7#header_v7.gid),
+ size=parse_numeric(V7#header_v7.size),
+ mtime=Mtime,
+ atime=Mtime,
+ ctime=Mtime,
+ typeflag=V7#header_v7.typeflag,
+ linkname=parse_string(V7#header_v7.linkname)
+ },
+ Typeflag = Header0#tar_header.typeflag,
+ Header1 = if Format > ?FORMAT_V7 ->
+ unpack_modern(Format, V7, Bin, Header0);
+ true ->
+ Name = Header0#tar_header.name,
+ Header0#tar_header{name=safe_join_path("", Name)}
+ end,
+ HeaderOnly = is_header_only_type(Typeflag),
+ Header2 = if HeaderOnly ->
+ Header1#tar_header{size=0};
+ true ->
+ Header1
+ end,
+ if Typeflag =:= ?TYPE_GNU_SPARSE ->
+ Gnu = to_gnu(V7, Bin),
+ RealSize = parse_numeric(Gnu#header_gnu.real_size),
+ {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
+ Header3 = Header2#tar_header{size=RealSize},
+ {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
+ true ->
+ FileReader = #reg_file_reader{
+ handle=Reader,
+ num_bytes=Header2#tar_header.size,
+ size=Header2#tar_header.size,
+ pos = 0
+ },
+ {Header2, FileReader}
+ end.
+
+unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
+ when is_binary(Bin) ->
+ Typeflag = Header0#tar_header.typeflag,
+ Ustar = to_ustar(V7, Bin),
+ H0 = Header0#tar_header{
+ uname=parse_string(Ustar#header_ustar.uname),
+ gname=parse_string(Ustar#header_ustar.gname)},
+ H1 = if Typeflag =:= ?TYPE_CHAR
+ orelse Typeflag =:= ?TYPE_BLOCK ->
+ Ma = parse_numeric(Ustar#header_ustar.devmajor),
+ Mi = parse_numeric(Ustar#header_ustar.devminor),
+ H0#tar_header{
+ devmajor=Ma,
+ devminor=Mi
+ };
+ true ->
+ H0
+ end,
+ {Prefix, H2} = case Format of
+ ?FORMAT_USTAR ->
+ {parse_string(Ustar#header_ustar.prefix), H1};
+ ?FORMAT_STAR ->
+ Star = to_star(V7, Bin),
+ Prefix0 = parse_string(Star#header_star.prefix),
+ Atime0 = Star#header_star.atime,
+ Atime = posix_to_erlang_time(parse_numeric(Atime0)),
+ Ctime0 = Star#header_star.ctime,
+ Ctime = posix_to_erlang_time(parse_numeric(Ctime0)),
+ {Prefix0, H1#tar_header{
+ atime=Atime,
+ ctime=Ctime
+ }};
+ _ ->
+ {"", H1}
+ end,
+ Name = H2#tar_header.name,
+ H2#tar_header{name=safe_join_path(Prefix, Name)}.
+
+
+safe_join_path([], Name) ->
+ strip_slashes(Name, both);
+safe_join_path(Prefix, []) ->
+ strip_slashes(Prefix, right);
+safe_join_path(Prefix, Name) ->
+ filename:join(strip_slashes(Prefix, right), strip_slashes(Name, both)).
+
+strip_slashes(Str, Direction) ->
+ string:strip(Str, Direction, $/).
+
+new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
+ true = validate_sparse_entries(Sparsemap, RealSize),
+ #sparse_file_reader{
+ handle = Reader,
+ num_bytes = RealSize,
+ pos = 0,
+ size = RealSize,
+ sparse_map = Sparsemap}.
+
+validate_sparse_entries(Entries, RealSize) ->
+ validate_sparse_entries(Entries, RealSize, 0, 0).
+validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
true;
-check_extract(Name, #read_opts{files=Files}) ->
- ordsets:is_element(Name, Files).
+validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
+ Offset = Entry#sparse_entry.offset,
+ NumBytes = Entry#sparse_entry.num_bytes,
+ if
+ Offset > ?MAX_INT64-NumBytes ->
+ throw({error, {invalid_sparse_map_entry, offset_too_large}});
+ Offset+NumBytes > RealSize ->
+ throw({error, {invalid_sparse_map_entry, offset_too_large}});
+ I > 0 andalso LastOffset > Offset ->
+ throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
+ true ->
+ ok
+ end,
+ validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
+
+
+-spec parse_sparse_map(header_gnu(), reader_type()) ->
+ {[sparse_entry()], reader_type()}.
+parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
+ when Sparse#sparse_array.is_extended ->
+ parse_sparse_map(Sparse, Reader, []);
+parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
+ {Sparse#sparse_array.entries, Reader}.
+parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
+ case read_block(Reader) of
+ eof ->
+ throw({error, eof});
+ {ok, Block, Reader2} ->
+ Sparse2 = to_sparse_array(Block),
+ parse_sparse_map(Sparse2, Reader2, Entries++Acc)
+ end;
+parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
+ Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
+ A =< B
+ end, Entries++Acc),
+ {Sorted, Reader}.
+
+%% Defined by taking the sum of the unsigned byte values of the
+%% entire header record, treating the checksum bytes to as ASCII spaces
+compute_checksum(<<H1:?V7_CHKSUM/binary,
+ H2:?V7_CHKSUM_LEN/binary,
+ Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
+ _/binary>>) ->
+ C0 = checksum(H1) + (byte_size(H2) * $\s),
+ C1 = checksum(Rest),
+ C0 + C1.
+
+compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
+ H2:?V7_CHKSUM_LEN/binary,
+ Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
+ _/binary>>) ->
+ C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
+ C1 = signed_checksum(Rest),
+ C0 + C1.
-get_header(File) ->
- case do_read(File, ?record_size) of
- eof ->
- throw({error,eof});
- {ok, Bin} when is_binary(Bin) ->
- convert_header(Bin);
- {ok, List} ->
- convert_header(list_to_binary(List));
- {error, Reason} ->
- throw({error, Reason})
- end.
+%% Returns the checksum of a binary.
+checksum(Bin) -> checksum(Bin, 0).
+checksum(<<A/unsigned,Rest/binary>>, Sum) ->
+ checksum(Rest, Sum+A);
+checksum(<<>>, Sum) -> Sum.
-%% Converts the tar header to a record.
+signed_checksum(Bin) -> signed_checksum(Bin, 0).
+signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
+ signed_checksum(Rest, Sum+A);
+signed_checksum(<<>>, Sum) -> Sum.
+
+-spec parse_numeric(binary()) -> non_neg_integer().
+parse_numeric(<<>>) ->
+ 0;
+parse_numeric(<<First, _/binary>> = Bin) ->
+ %% check for base-256 format first
+ %% if the bit is set, then all following bits constitute a two's
+ %% complement encoded number in big-endian byte order
+ if
+ First band 16#80 =/= 0 ->
+ %% Handling negative numbers relies on the following identity:
+ %% -a-1 == ^a
+ %% If the number is negative, we use an inversion mask to invert
+ %% the data bytes and treat the value as an unsigned number
+ Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
+ Bytes = binary:bin_to_list(Bin),
+ Reducer = fun (C, {I, X}) ->
+ C1 = C bxor Inv,
+ C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
+ if (X bsr 56) > 0 ->
+ throw({error,integer_overflow});
+ true ->
+ {I+1, (X bsl 8) bor C2}
+ end
+ end,
+ {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
+ if (N bsr 63) > 0 ->
+ throw({error, integer_overflow});
+ true ->
+ if Inv =:= 16#FF ->
+ -1 bxor N;
+ true ->
+ N
+ end
+ end;
+ true ->
+ %% normal case is an octal number
+ parse_octal(Bin)
+ end.
-convert_header(Bin) when byte_size(Bin) =:= ?record_size ->
- case verify_checksum(Bin) of
- ok ->
- Hd = #tar_header{name=get_name(Bin),
- mode=from_octal(Bin, ?th_mode, ?th_mode_len),
- uid=from_octal(Bin, ?th_uid, ?th_uid_len),
- gid=from_octal(Bin, ?th_gid, ?th_gid_len),
- size=from_octal(Bin, ?th_size, ?th_size_len),
- mtime=from_octal(Bin, ?th_mtime, ?th_mtime_len),
- linkname=from_string(Bin,
- ?th_linkname, ?th_linkname_len),
- typeflag=typeflag(Bin)},
- convert_header1(Hd);
- eof ->
- eof
+parse_octal(Bin) when is_binary(Bin) ->
+ %% skip leading/trailing zero bytes and spaces
+ do_parse_octal(Bin, <<>>).
+do_parse_octal(<<>>, <<>>) ->
+ 0;
+do_parse_octal(<<>>, Acc) ->
+ case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
+ {error, _} -> throw({error, invalid_tar_checksum});
+ {ok, [Octal], []} -> Octal;
+ {ok, _, _} -> throw({error, invalid_tar_checksum})
end;
-convert_header(Bin) when byte_size(Bin) =:= 0 ->
+do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
+ do_parse_octal(Rest, Acc);
+do_parse_octal(<<0, Rest/binary>>, Acc) ->
+ do_parse_octal(Rest, Acc);
+do_parse_octal(<<C, Rest/binary>>, Acc) ->
+ do_parse_octal(Rest, <<Acc/binary, C>>).
+
+parse_string(Bin) when is_binary(Bin) ->
+ do_parse_string(Bin, <<>>).
+do_parse_string(<<>>, Acc) ->
+ case unicode:characters_to_list(Acc) of
+ Str when is_list(Str) ->
+ Str;
+ {incomplete, _Str, _Rest} ->
+ binary:bin_to_list(Acc);
+ {error, _Str, _Rest} ->
+ throw({error, {bad_header, invalid_string}})
+ end;
+do_parse_string(<<0, _/binary>>, Acc) ->
+ do_parse_string(<<>>, Acc);
+do_parse_string(<<C, Rest/binary>>, Acc) ->
+ do_parse_string(Rest, <<Acc/binary, C>>).
+
+convert_header(Bin, #reader{pos=Pos}=Reader)
+ when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
+ case get_format(Bin) of
+ ?FORMAT_UNKNOWN ->
+ throw({error, bad_header});
+ {ok, Format, V7} ->
+ unpack_format(Format, V7, Bin, Reader);
+ {error, Reason} ->
+ throw({error, {bad_header, Reason}})
+ end;
+convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
+ throw({error, misaligned_read, Pos});
+convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
eof;
-convert_header(_Bin) ->
+convert_header(_Bin, _Reader) ->
throw({error, eof}).
-%% Basic sanity. Better set the element size to zero here if the type
-%% always is of zero length.
-
-convert_header1(H) when H#tar_header.typeflag =:= symlink, H#tar_header.size =/= 0 ->
- convert_header1(H#tar_header{size=0});
-convert_header1(H) when H#tar_header.typeflag =:= directory, H#tar_header.size =/= 0 ->
- convert_header1(H#tar_header{size=0});
-convert_header1(Header) ->
- Header.
-
-typeflag(Bin) ->
- [T] = binary_to_list(Bin, ?th_typeflag+1, ?th_typeflag+1),
- case T of
- 0 -> regular;
- $0 -> regular;
- $1 -> link;
- $2 -> symlink;
- $3 -> char;
- $4 -> block;
- $5 -> directory;
- $6 -> fifo;
- $7 -> regular;
- _ -> unknown
+%% Creates a partially-populated header record based
+%% on the provided file_info record. If the file is
+%% a symlink, then `link` is used as the link target.
+%% If the file is a directory, a slash is appended to the name.
+fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
+ BaseHeader = #tar_header{name=Name,
+ mtime=Fi#file_info.mtime,
+ atime=Fi#file_info.atime,
+ ctime=Fi#file_info.ctime,
+ mode=Fi#file_info.mode,
+ uid=Fi#file_info.uid,
+ gid=Fi#file_info.gid,
+ typeflag=?TYPE_REGULAR},
+ do_fileinfo_to_header(BaseHeader, Fi, Link).
+
+do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
+ Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
+do_fileinfo_to_header(#tar_header{name=Name}=Header,
+ #file_info{type=directory}, _Link) ->
+ Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
+do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
+ Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
+do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
+ when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
+ Header#tar_header{typeflag=?TYPE_CHAR,
+ devmajor=Fi#file_info.major_device,
+ devminor=Fi#file_info.minor_device};
+do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
+ when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
+ Header#tar_header{typeflag=?TYPE_BLOCK,
+ devmajor=Fi#file_info.major_device,
+ devminor=Fi#file_info.minor_device};
+do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
+ when (Mode band ?S_IFMT) =:= ?S_FIFO ->
+ Header#tar_header{typeflag=?TYPE_FIFO};
+do_fileinfo_to_header(Header, Fi, _Link) ->
+ {error, {invalid_file_type, Header#tar_header.name, Fi}}.
+
+is_ascii(Str) when is_list(Str) ->
+ not lists:any(fun (Char) -> Char >= 16#80 end, Str);
+is_ascii(Bin) when is_binary(Bin) ->
+ is_ascii1(Bin).
+
+is_ascii1(<<>>) ->
+ true;
+is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
+ false;
+is_ascii1(<<_, Rest/binary>>) ->
+ is_ascii1(Rest).
+
+to_ascii(Str) when is_list(Str) ->
+ case is_ascii(Str) of
+ true ->
+ unicode:characters_to_binary(Str);
+ false ->
+ Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
+ unicode:characters_to_binary(Chars)
+ end;
+to_ascii(Bin) when is_binary(Bin) ->
+ to_ascii(Bin, <<>>).
+to_ascii(<<>>, Acc) ->
+ Acc;
+to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
+ to_ascii(Rest, <<Acc/binary,C>>);
+to_ascii(<<_, Rest/binary>>, Acc) ->
+ to_ascii(Rest, Acc).
+
+is_header_only_type(?TYPE_SYMLINK) -> true;
+is_header_only_type(?TYPE_LINK) -> true;
+is_header_only_type(?TYPE_DIR) -> true;
+is_header_only_type(_) -> false.
+
+posix_to_erlang_time(Sec) ->
+ OneMillion = 1000000,
+ Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}),
+ erlang:universaltime_to_localtime(Time).
+
+foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
+ when is_function(Fun,4) ->
+ case foldl_read0(Reader, Fun, Accu, Opts) of
+ {ok, Result, _Reader2} ->
+ Result;
+ {error, _} = Err ->
+ Err
+ end;
+foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
+ {error, {read_mode_expected, Access}};
+foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
+ when is_function(Fun,4) ->
+ try open(TarName, [read|Opts#read_opts.open_mode]) of
+ {ok, #reader{access=read}=Reader} ->
+ foldl_read(Reader, Fun, Accu, Opts);
+ {error, _} = Err ->
+ Err
+ catch
+ throw:Err ->
+ Err
end.
-%% Get the name of the file from the prefix and name fields of the
-%% tar header.
-
-get_name(Bin0) ->
- List0 = get_name_raw(Bin0),
- case file:native_name_encoding() of
- utf8 ->
- Bin = list_to_binary(List0),
- case unicode:characters_to_list(Bin) of
- {error,_,_} ->
- List0;
- List when is_list(List) ->
- List
- end;
- latin1 ->
- List0
+foldl_read0(Reader, Fun, Accu, Opts) ->
+ try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
+ {ok,_,_} = Ok ->
+ Ok
+ catch
+ throw:{error, {Reason, Format, Args}} ->
+ read_verbose(Opts, Format, Args),
+ {error, Reason};
+ throw:Err ->
+ Err
end.
-get_name_raw(Bin) ->
- Name = from_string(Bin, ?th_name, ?th_name_len),
- case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of
- [0] ->
- Name;
- [_] ->
- Prefix = binary_to_list(Bin, ?th_prefix+1, byte_size(Bin)),
- lists:reverse(remove_nulls(Prefix), [$/|Name])
+foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
+ {ok, Reader1} = skip_unread(Reader0),
+ case get_header(Reader1) of
+ eof ->
+ Fun(eof, Reader1, Opts, Accu0);
+ {Header, Reader2} ->
+ case Header#tar_header.typeflag of
+ ?TYPE_X_HEADER ->
+ {ExtraHeaders2, Reader3} = parse_pax(Reader2),
+ ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
+ foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
+ ?TYPE_GNU_LONGNAME ->
+ {RealName, Reader3} = get_real_name(Reader2),
+ ExtraHeaders2 = maps:put(?PAX_PATH,
+ parse_string(RealName), ExtraHeaders),
+ foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
+ ?TYPE_GNU_LONGLINK ->
+ {RealName, Reader3} = get_real_name(Reader2),
+ ExtraHeaders2 = maps:put(?PAX_LINKPATH,
+ parse_string(RealName), ExtraHeaders),
+ foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
+ _ ->
+ Header1 = merge_pax(Header, ExtraHeaders),
+ {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
+ foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
+ end
end.
-from_string(Bin, Pos, Len) ->
- lists:reverse(remove_nulls(binary_to_list(Bin, Pos+1, Pos+Len))).
-
-%% Returns all characters up to (but not including) the first null
-%% character, in REVERSE order.
-
-remove_nulls(List) ->
- remove_nulls(List, []).
-
-remove_nulls([0|_], Result) ->
- remove_nulls([], Result);
-remove_nulls([C|Rest], Result) ->
- remove_nulls(Rest, [C|Result]);
-remove_nulls([], Result) ->
- Result.
-
-from_octal(Bin, Pos, Len) ->
- from_octal(binary_to_list(Bin, Pos+1, Pos+Len)).
-
-from_octal([$\s|Rest]) ->
- from_octal(Rest);
-from_octal([Digit|Rest]) when $0 =< Digit, Digit =< $7 ->
- from_octal(Rest, Digit-$0);
-from_octal(Bin) when is_binary(Bin) ->
- from_octal(binary_to_list(Bin));
-from_octal(Other) ->
- throw({error, {bad_header, "Bad octal number: ~p", [Other]}}).
-
-from_octal([Digit|Rest], Result) when $0 =< Digit, Digit =< $7 ->
- from_octal(Rest, Result*8+Digit-$0);
-from_octal([$\s|_], Result) ->
- Result;
-from_octal([0|_], Result) ->
- Result;
-from_octal(Other, _) ->
- throw({error, {bad_header, "Bad contents in octal field: ~p", [Other]}}).
-
-%% Retrieves the next element from the archive.
-%% Returns {ok, Bin} | eof | {error, Reason}
-
-get_element(File, #tar_header{size = 0}) ->
- skip_to_next(File),
- {ok,<<>>};
-get_element(File, #tar_header{size = Size}) ->
- case do_read(File, Size) of
- {ok,Bin}=Res when byte_size(Bin) =:= Size ->
- skip_to_next(File),
- Res;
- {ok,List} when length(List) =:= Size ->
- skip_to_next(File),
- {ok,list_to_binary(List)};
- {ok,_} -> throw({error,eof});
- {error, Reason} -> throw({error, Reason});
- eof -> throw({error,eof})
+%% Applies all known PAX attributes to the current tar header
+-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
+merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
+ do_merge_pax(Header, maps:to_list(ExtraHeaders)).
+
+do_merge_pax(Header, []) ->
+ Header;
+do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
+ do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
+do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
+ do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
+do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
+ do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
+do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
+ do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
+do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
+ Uid2 = binary_to_integer(Uid),
+ do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
+do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
+ Gid2 = binary_to_integer(Gid),
+ do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
+do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
+ Atime2 = parse_pax_time(Atime),
+ do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
+do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
+ Mtime2 = parse_pax_time(Mtime),
+ do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
+do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
+ Ctime2 = parse_pax_time(Ctime),
+ do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
+do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
+ Size2 = binary_to_integer(Size),
+ do_merge_pax(Header#tar_header{size=Size2}, Rest);
+do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
+ do_merge_pax(Header, Rest);
+do_merge_pax(Header, [_Ignore|Rest]) ->
+ do_merge_pax(Header, Rest).
+
+%% Returns the time since UNIX epoch as a datetime
+-spec parse_pax_time(binary()) -> calendar:datetime().
+parse_pax_time(Bin) when is_binary(Bin) ->
+ TotalNano = case binary:split(Bin, [<<$.>>]) of
+ [SecondsStr, NanoStr0] ->
+ Seconds = binary_to_integer(SecondsStr),
+ if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
+ %% right pad
+ PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
+ Padding = binary:copy(<<$0>>, PaddingN),
+ NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
+ Nano = binary_to_integer(NanoStr1),
+ (Seconds*?BILLION)+Nano;
+ byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
+ %% right truncate
+ NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
+ Nano = binary_to_integer(NanoStr1),
+ (Seconds*?BILLION)+Nano;
+ true ->
+ (Seconds*?BILLION)+binary_to_integer(NanoStr0)
+ end;
+ [SecondsStr] ->
+ binary_to_integer(SecondsStr)*?BILLION
+ end,
+ %% truncate to microseconds
+ Micro = TotalNano div 1000,
+ Mega = Micro div 1000000000000,
+ Secs = Micro div 1000000 - (Mega*1000000),
+ Micro2 = Micro rem 1000000,
+ calendar:now_to_datetime({Mega, Secs, Micro2}).
+
+%% Given a regular file reader, reads the whole file and
+%% parses all extended attributes it contains.
+parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+ {#{}, Handle};
+parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
+ case do_read(Handle0, NumBytes) of
+ {ok, Bytes, Handle1} ->
+ do_parse_pax(Handle1, Bytes, #{});
+ {error, _} = Err ->
+ throw(Err)
end.
-%% Verify the checksum in the header. First try an unsigned addition
-%% of all bytes in the header (as it should be according to Posix).
-
-verify_checksum(Bin) ->
- <<H1:?th_chksum/binary,CheckStr:?th_chksum_len/binary,H2/binary>> = Bin,
- case checksum(H1) + checksum(H2) of
- 0 -> eof;
- Checksum0 ->
- Csum = from_octal(CheckStr),
- CsumInit = ?th_chksum_len * $\s,
- case Checksum0 + CsumInit of
- Csum -> ok;
- Unsigned ->
- verify_checksum(H1, H2, CsumInit, Csum, Unsigned)
- end
+do_parse_pax(Reader, <<>>, Headers) ->
+ {Headers, Reader};
+do_parse_pax(Reader, Bin, Headers) ->
+ {Key, Value, Residual} = parse_pax_record(Bin),
+ NewHeaders = maps:put(Key, Value, Headers),
+ do_parse_pax(Reader, Residual, NewHeaders).
+
+%% Parse an extended attribute
+parse_pax_record(Bin) when is_binary(Bin) ->
+ case binary:split(Bin, [<<$\n>>]) of
+ [Record, Residual] ->
+ case binary:split(Record, [<<$\s>>], [trim_all]) of
+ [_Len, Record1] ->
+ case binary:split(Record1, [<<$=>>], [trim_all]) of
+ [AttrName, AttrValue] ->
+ {AttrName, AttrValue, Residual};
+ _Other ->
+ throw({error, malformed_pax_record})
+ end;
+ _Other ->
+ throw({error, malformed_pax_record})
+ end;
+ _Other ->
+ throw({error, malformed_pax_record})
end.
-%% The checksums didn't match. Now try a signed addition.
+get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+ {"", Handle};
+get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
+ case do_read(Handle0, NumBytes) of
+ {ok, RealName, Handle1} ->
+ {RealName, Handle1};
+ {error, _} = Err ->
+ throw(Err)
+ end;
+get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
+ case do_read(Reader0, NumBytes) of
+ {ok, RealName, Reader1} ->
+ {RealName, Reader1};
+ {error, _} = Err ->
+ throw(Err)
+ end.
-verify_checksum(H1, H2, Csum, ShouldBe, Unsigned) ->
- case signed_sum(binary_to_list(H1), signed_sum(binary_to_list(H2), Csum)) of
- ShouldBe -> ok;
- Signed ->
- throw({error,
- {bad_header,
- "Incorrect directory checksum ~w (~w), should be ~w",
- [Signed, Unsigned, ShouldBe]}})
+%% Skip the remaining bytes for the current file entry
+skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
+ Padding = skip_padding(Size),
+ AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
+ case do_position(Handle0, AbsPos) of
+ {ok, _, Handle1} ->
+ Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
+ Err ->
+ throw(Err)
+ end;
+skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
+ case do_read(Reader, Size-Pos) of
+ {ok, _, Reader2} ->
+ Reader2;
+ Err ->
+ throw(Err)
end.
-signed_sum([C|Rest], Sum) when C < 128 ->
- signed_sum(Rest, Sum+C);
-signed_sum([C|Rest], Sum) ->
- signed_sum(Rest, Sum+C-256);
-signed_sum([], Sum) -> Sum.
-
-write_extracted_element(Header, Bin, Opts)
- when Opts#read_opts.output =:= memory ->
- case Header#tar_header.typeflag of
- regular ->
- {ok, {Header#tar_header.name, Bin}};
- _ ->
- ok
+skip_padding(0) ->
+ 0;
+skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
+ 0;
+skip_padding(Size) when Size =< ?BLOCK_SIZE ->
+ ?BLOCK_SIZE - Size;
+skip_padding(Size) ->
+ ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
+
+skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
+ Padding = skip_padding(Pos + ?BLOCK_SIZE),
+ AbsPos = Pos + Padding,
+ case do_position(Reader0, AbsPos) of
+ {ok, _, Reader1} ->
+ {ok, Reader1};
+ Err ->
+ throw(Err)
+ end;
+skip_unread(#reader{}=Reader) ->
+ {ok, Reader};
+skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+ skip_unread(Handle);
+skip_unread(#reg_file_reader{}=Reader) ->
+ #reg_file_reader{handle=Handle} = skip_file(Reader),
+ {ok, Handle};
+skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
+ skip_unread(Handle);
+skip_unread(#sparse_file_reader{}=Reader) ->
+ #sparse_file_reader{handle=Handle} = skip_file(Reader),
+ {ok, Handle}.
+
+write_extracted_element(#tar_header{name=Name,typeflag=Type},
+ Bin,
+ #read_opts{output=memory}=Opts) ->
+ case typeflag(Type) of
+ regular ->
+ read_verbose(Opts, "x ~ts~n", [Name]),
+ {ok, {Name, Bin}};
+ _ ->
+ ok
end;
-write_extracted_element(Header, Bin, Opts) ->
- Name = filename:absname(Header#tar_header.name, Opts#read_opts.cwd),
- Created =
- case Header#tar_header.typeflag of
- regular ->
- write_extracted_file(Name, Bin, Opts);
- directory ->
- create_extracted_dir(Name, Opts);
- symlink ->
- create_symlink(Name, Header, Opts);
- Other -> % Ignore.
- read_verbose(Opts, "x ~ts - unsupported type ~p~n",
- [Name, Other]),
- not_written
- end,
+write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
+ Name1 = filename:absname(Name0, Opts#read_opts.cwd),
+ Created =
+ case typeflag(Header#tar_header.typeflag) of
+ regular ->
+ create_regular(Name1, Name0, Bin, Opts);
+ directory ->
+ read_verbose(Opts, "x ~ts~n", [Name0]),
+ create_extracted_dir(Name1, Opts);
+ symlink ->
+ read_verbose(Opts, "x ~ts~n", [Name0]),
+ create_symlink(Name1, Header#tar_header.linkname, Opts);
+ Device when Device =:= char orelse Device =:= block ->
+ %% char/block devices will be created as empty files
+ %% and then have their major/minor device set later
+ create_regular(Name1, Name0, <<>>, Opts);
+ fifo ->
+ %% fifo devices will be created as empty files
+ create_regular(Name1, Name0, <<>>, Opts);
+ Other -> % Ignore.
+ read_verbose(Opts, "x ~ts - unsupported type ~p~n",
+ [Name0, Other]),
+ not_written
+ end,
case Created of
- ok -> set_extracted_file_info(Name, Header);
- not_written -> ok
+ ok -> set_extracted_file_info(Name1, Header);
+ not_written -> ok
+ end.
+
+create_regular(Name, NameInArchive, Bin, Opts) ->
+ case write_extracted_file(Name, Bin, Opts) of
+ not_written ->
+ read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
+ not_written;
+ Ok ->
+ read_verbose(Opts, "x ~ts~n", [NameInArchive]),
+ Ok
end.
create_extracted_dir(Name, _Opts) ->
case file:make_dir(Name) of
- ok -> ok;
- {error,enotsup} -> not_written;
- {error,eexist} -> not_written;
- {error,enoent} -> make_dirs(Name, dir);
- {error,Reason} -> throw({error, Reason})
+ ok -> ok;
+ {error,enotsup} -> not_written;
+ {error,eexist} -> not_written;
+ {error,enoent} -> make_dirs(Name, dir);
+ {error,Reason} -> throw({error, Reason})
end.
-create_symlink(Name, #tar_header{linkname=Linkname}=Header, Opts) ->
+create_symlink(Name, Linkname, Opts) ->
case file:make_symlink(Linkname, Name) of
- ok -> ok;
- {error,enoent} ->
- ok = make_dirs(Name, file),
- create_symlink(Name, Header, Opts);
- {error,eexist} -> not_written;
- {error,enotsup} ->
- read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
- not_written;
- {error,Reason} -> throw({error, Reason})
+ ok -> ok;
+ {error,enoent} ->
+ ok = make_dirs(Name, file),
+ create_symlink(Name, Linkname, Opts);
+ {error,eexist} -> not_written;
+ {error,enotsup} ->
+ read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
+ not_written;
+ {error,Reason} -> throw({error, Reason})
end.
write_extracted_file(Name, Bin, Opts) ->
Write =
- case Opts#read_opts.keep_old_files of
- true ->
- case file:read_file_info(Name) of
- {ok, _} -> false;
- _ -> true
- end;
- false -> true
- end,
+ case Opts#read_opts.keep_old_files of
+ true ->
+ case file:read_file_info(Name) of
+ {ok, _} -> false;
+ _ -> true
+ end;
+ false -> true
+ end,
case Write of
- true ->
- read_verbose(Opts, "x ~ts~n", [Name]),
- write_file(Name, Bin);
- false ->
- read_verbose(Opts, "x ~ts - exists, not created~n", [Name]),
- not_written
+ true -> write_file(Name, Bin);
+ false -> not_written
end.
write_file(Name, Bin) ->
case file:write_file(Name, Bin) of
- ok -> ok;
- {error,enoent} ->
- ok = make_dirs(Name, file),
- write_file(Name, Bin);
- {error,Reason} ->
- throw({error, Reason})
+ ok -> ok;
+ {error,enoent} ->
+ ok = make_dirs(Name, file),
+ write_file(Name, Bin);
+ {error,Reason} ->
+ throw({error, Reason})
end.
-set_extracted_file_info(_, #tar_header{typeflag = symlink}) -> ok;
-set_extracted_file_info(Name, #tar_header{mode=Mode, mtime=Mtime}) ->
- Info = #file_info{mode=Mode, mtime=posix_to_erlang_time(Mtime)},
+set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
+set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok;
+set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
+ set_device_info(Name, Header);
+set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
+ set_device_info(Name, Header);
+set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
+ Info = #file_info{mode=Mode, mtime=Mtime},
+ file:write_file_info(Name, Info).
+
+set_device_info(Name, #tar_header{}=Header) ->
+ Mtime = Header#tar_header.mtime,
+ Mode = Header#tar_header.mode,
+ Devmajor = Header#tar_header.devmajor,
+ Devminor = Header#tar_header.devminor,
+ Info = #file_info{
+ mode=Mode,
+ mtime=Mtime,
+ major_device=Devmajor,
+ minor_device=Devminor
+ },
file:write_file_info(Name, Info).
%% Makes all directories leading up to the file.
make_dirs(Name, file) ->
- filelib:ensure_dir(Name);
+ filelib:ensure_dir(Name);
make_dirs(Name, dir) ->
- filelib:ensure_dir(filename:join(Name,"*")).
+ filelib:ensure_dir(filename:join(Name,"*")).
%% Prints the message on if the verbose option is given (for reading).
-
read_verbose(#read_opts{verbose=true}, Format, Args) ->
- io:format(Format, Args),
- io:nl();
+ io:format(Format, Args);
read_verbose(_, _, _) ->
ok.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% Utility functions.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Returns the checksum of a binary.
-
-checksum(Bin) -> checksum(Bin, 0).
-
-checksum(<<A,B,C,D,E,F,G,H,T/binary>>, Sum) ->
- checksum(T, Sum+A+B+C+D+E+F+G+H);
-checksum(<<A,T/binary>>, Sum) ->
- checksum(T, Sum+A);
-checksum(<<>>, Sum) -> Sum.
-
-%% Returns a list of zeroes to pad out to the given block size.
-
-padding(Size, BlockSize) ->
- zeroes(pad_size(Size, BlockSize)).
-
-pad_size(Size, BlockSize) ->
- case Size rem BlockSize of
- 0 -> 0;
- Rem -> BlockSize-Rem
- end.
-
-zeroes(0) -> [];
-zeroes(1) -> [0];
-zeroes(2) -> [0,0];
-zeroes(Number) ->
- Half = zeroes(Number div 2),
- case Number rem 2 of
- 0 -> [Half|Half];
- 1 -> [Half|[0|Half]]
- end.
-
-%% Skips the given number of bytes rounded up to an even record.
-
-skip(File, Size) ->
- %% Note: There is no point in handling failure to get the current position
- %% in the file. If it doesn't work, something serious is wrong.
- Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size,
- {ok,_} = do_position(File, {cur, Amount}),
- ok.
-
-%% Skips to the next record in the file.
-
-skip_to_next(File) ->
- %% Note: There is no point in handling failure to get the current position
- %% in the file. If it doesn't work, something serious is wrong.
- {ok, Position} = do_position(File, {cur, 0}),
- NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size,
- {ok,NewPosition} = do_position(File, NewPosition),
- ok.
-
%% Prints the message on if the verbose option is given.
-
add_verbose(#add_opts{verbose=true}, Format, Args) ->
io:format(Format, Args);
add_verbose(_, _, _) ->
ok.
-%% Converts a tuple containing the time to a Posix time (seconds
-%% since Jan 1, 1970).
+%%%%%%%%%%%%%%%%%%
+%% I/O primitives
+%%%%%%%%%%%%%%%%%%
+
+do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
+ when is_function(Fun,2) ->
+ case Fun(write,{Handle,Data}) of
+ ok ->
+ {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
+ {ok, Reader1#reader{pos=Pos}};
+ {error, _} = Err ->
+ Err
+ end.
-posix_time(Time) ->
- EpochStart = {{1970,1,1},{0,0,0}},
- {Days,{Hour,Min,Sec}} = calendar:time_difference(EpochStart, Time),
- 86400*Days + 3600*Hour + 60*Min + Sec.
+do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
+ when is_function(Fun, 2) ->
+ do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
+do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
+ when is_function(Fun, 2) ->
+ case file:open(Source, [read, binary]) of
+ {ok, SourceFd} ->
+ case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
+ {ok, _Copied, _Reader2} = Ok->
+ _ = file:close(SourceFd),
+ Ok;
+ Err ->
+ _ = file:close(SourceFd),
+ throw(Err)
+ end;
+ Err ->
+ throw(Err)
+ end.
-posix_to_erlang_time(Sec) ->
- OneMillion = 1000000,
- Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}),
- erlang:universaltime_to_localtime(Time).
+copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
+ case file:read(Source, ChunkSize) of
+ {ok, Bin} ->
+ {ok, Reader2} = do_write(Reader, Bin),
+ copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
+ eof ->
+ {ok, Copied, Reader};
+ Other ->
+ Other
+ end.
-read_file_and_info(Name, Opts) ->
- ReadInfo = Opts#add_opts.read_info,
- case ReadInfo(Name) of
- {ok,Info} when Info#file_info.type =:= regular,
- Opts#add_opts.chunk_size>0 ->
- {ok,chunked,Info};
- {ok,Info} when Info#file_info.type =:= regular ->
- case file:read_file(Name) of
- {ok,Bin} ->
- {ok,Bin,Info};
- Error ->
- Error
- end;
- {ok,Info} when Info#file_info.type =:= symlink ->
- case file:read_link(Name) of
- {ok,PointsTo} ->
- {ok,PointsTo,Info};
- Error ->
- Error
- end;
- {ok, Info} ->
- {ok,[],Info};
- Error ->
- Error
+
+do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
+ when is_function(Fun,2)->
+ case Fun(position, {Handle,Pos}) of
+ {ok, NewPos} ->
+ %% since Pos may not always be an absolute seek,
+ %% make sure we update the reader with the new absolute position
+ {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
+ {ok, NewPos, Reader#reader{pos=AbsPos}};
+ Other ->
+ Other
end.
-foreach_while_ok(Fun, [First|Rest]) ->
- case Fun(First) of
- ok -> foreach_while_ok(Fun, Rest);
- Other -> Other
+do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
+ NumBytes = Size - Pos,
+ ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
+ case do_read(Handle, ActualLen) of
+ {ok, Bin, Handle2} ->
+ NewPos = Pos + ActualLen,
+ NumBytes2 = Size - NewPos,
+ Reader1 = Reader#reg_file_reader{
+ handle=Handle2,
+ pos=NewPos,
+ num_bytes=NumBytes2},
+ {ok, Bin, Reader1};
+ Other ->
+ Other
end;
-foreach_while_ok(_, []) -> ok.
-
-open_mode(Mode) ->
- open_mode(Mode, false, [raw], []).
+do_read(#sparse_file_reader{}=Reader, Len) ->
+ do_sparse_read(Reader, Len);
+do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
+ when is_function(Fun,2)->
+ %% Always convert to binary internally
+ case Fun(read2,{Handle,Len}) of
+ {ok, List} when is_list(List) ->
+ Bin = list_to_binary(List),
+ NewPos = Pos+byte_size(Bin),
+ {ok, Bin, Reader#reader{pos=NewPos}};
+ {ok, Bin} when is_binary(Bin) ->
+ NewPos = Pos+byte_size(Bin),
+ {ok, Bin, Reader#reader{pos=NewPos}};
+ Other ->
+ Other
+ end.
-open_mode(read, _, Raw, _) ->
- {ok, read, Raw, []};
-open_mode(write, _, Raw, _) ->
- {ok, write, Raw, []};
-open_mode([read|Rest], false, Raw, Opts) ->
- open_mode(Rest, read, Raw, Opts);
-open_mode([write|Rest], false, Raw, Opts) ->
- open_mode(Rest, write, Raw, Opts);
-open_mode([compressed|Rest], Access, Raw, Opts) ->
- open_mode(Rest, Access, Raw, [compressed|Opts]);
-open_mode([cooked|Rest], Access, _Raw, Opts) ->
- open_mode(Rest, Access, [], Opts);
-open_mode([], Access, Raw, Opts) ->
- {ok, Access, Raw, Opts};
-open_mode(_, _, _, _) ->
- {error, einval}.
-%%%================================================================
-do_write({tar_descriptor,UsrHandle,Fun}, Data) -> Fun(write,{UsrHandle,Data}).
+do_sparse_read(Reader, Len) ->
+ do_sparse_read(Reader, Len, <<>>).
+
+do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
+ }=Reader0, Len, Acc) ->
+ %% skip all empty fragments
+ Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
+ do_sparse_read(Reader1, Len, Acc);
+do_sparse_read(#sparse_file_reader{sparse_map=[],
+ pos=Pos,size=Size}=Reader0, Len, Acc)
+ when Pos < Size ->
+ %% if there are no more fragments, it is possible that there is one last sparse hole
+ %% this behaviour matches the BSD tar utility
+ %% however, GNU tar stops returning data even if we haven't reached the end
+ {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
+ do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
+ {ok, Acc, Reader};
+do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
+ {ok, Acc, Reader};
+do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
+ pos=Pos}=Reader0, Len, Acc)
+ when Pos < Offset ->
+ {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
+ do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
+ pos=Pos}=Reader0, Len, Acc) ->
+ %% we're in a data fragment, so read from it
+ %% end offset of fragment
+ EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
+ %% bytes left in fragment
+ NumBytes = EndPos - Pos,
+ ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
+ case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
+ {ok, Bin, Handle} ->
+ BytesRead = byte_size(Bin),
+ ActualEndPos = Pos+BytesRead,
+ Reader1 = if ActualEndPos =:= EndPos ->
+ Reader0#sparse_file_reader{sparse_map=Entries};
+ true ->
+ Reader0
+ end,
+ Size = Reader1#sparse_file_reader.size,
+ NumBytes2 = Size - ActualEndPos,
+ Reader2 = Reader1#sparse_file_reader{
+ handle=Handle,
+ pos=ActualEndPos,
+ num_bytes=NumBytes2},
+ do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+ Other ->
+ Other
+ end.
+
+%% Reads a sparse hole ending at Offset
+read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
+ N = Offset - Pos,
+ N2 = if N > Len ->
+ Len;
+ true ->
+ N
+ end,
+ Bin = <<0:N2/unit:8>>,
+ NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
+ {ok, Bin, Reader#sparse_file_reader{
+ num_bytes=NumBytes,
+ pos=Pos+N2}}.
+
+-spec do_close(reader()) -> ok | {error, term()}.
+do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
+ Fun(close,Handle).
+
+%%%%%%%%%%%%%%%%%%
+%% Option parsing
+%%%%%%%%%%%%%%%%%%
-do_position({tar_descriptor,UsrHandle,Fun}, Pos) -> Fun(position,{UsrHandle,Pos}).
+extract_opts(List) ->
+ extract_opts(List, default_options()).
-do_read({tar_descriptor,UsrHandle,Fun}, Len) -> Fun(read2,{UsrHandle,Len}).
+table_opts(List) ->
+ read_opts(List, default_options()).
+
+default_options() ->
+ {ok, Cwd} = file:get_cwd(),
+ #read_opts{cwd=Cwd}.
-do_close({tar_descriptor,UsrHandle,Fun}) -> Fun(close,UsrHandle).
+extract_opts([keep_old_files|Rest], Opts) ->
+ extract_opts(Rest, Opts#read_opts{keep_old_files=true});
+extract_opts([{cwd, Cwd}|Rest], Opts) ->
+ extract_opts(Rest, Opts#read_opts{cwd=Cwd});
+extract_opts([{files, Files}|Rest], Opts) ->
+ Set = ordsets:from_list(Files),
+ extract_opts(Rest, Opts#read_opts{files=Set});
+extract_opts([memory|Rest], Opts) ->
+ extract_opts(Rest, Opts#read_opts{output=memory});
+extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+ extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
+extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+ extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
+extract_opts([verbose|Rest], Opts) ->
+ extract_opts(Rest, Opts#read_opts{verbose=true});
+extract_opts([Other|Rest], Opts) ->
+ extract_opts(Rest, read_opts([Other], Opts));
+extract_opts([], Opts) ->
+ Opts.
+
+read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+ read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
+read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+ read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
+read_opts([verbose|Rest], Opts) ->
+ read_opts(Rest, Opts#read_opts{verbose=true});
+read_opts([_|Rest], Opts) ->
+ read_opts(Rest, Opts);
+read_opts([], Opts) ->
+ Opts.
diff --git a/lib/stdlib/src/erl_tar.hrl b/lib/stdlib/src/erl_tar.hrl
new file mode 100644
index 0000000000..d646d02989
--- /dev/null
+++ b/lib/stdlib/src/erl_tar.hrl
@@ -0,0 +1,394 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+
+%% Options used when adding files to a tar archive.
+-record(add_opts, {
+ read_info, %% Fun to use for read file/link info.
+ chunk_size = 0, %% For file reading when sending to sftp. 0=do not chunk
+ verbose = false}). %% Verbose on/off.
+-type add_opts() :: #add_opts{}.
+
+%% Options used when reading a tar archive.
+-record(read_opts, {
+ cwd :: string(), %% Current working directory.
+ keep_old_files = false :: boolean(), %% Owerwrite or not.
+ files = all, %% Set of files to extract (or all)
+ output = file :: 'file' | 'memory',
+ open_mode = [], %% Open mode options.
+ verbose = false :: boolean()}). %% Verbose on/off.
+-type read_opts() :: #read_opts{}.
+
+-type add_opt() :: dereference |
+ verbose |
+ {chunks, pos_integer()}.
+
+-type extract_opt() :: {cwd, string()} |
+ {files, [string()]} |
+ compressed |
+ cooked |
+ memory |
+ keep_old_files |
+ verbose.
+
+-type create_opt() :: compressed |
+ cooked |
+ dereference |
+ verbose.
+
+-type filelist() :: [file:filename() |
+ {string(), binary()} |
+ {string(), file:filename()}].
+
+%% The tar header, once fully parsed.
+-record(tar_header, {
+ name = "" :: string(), %% name of header file entry
+ mode = 8#100644 :: non_neg_integer(), %% permission and mode bits
+ uid = 0 :: non_neg_integer(), %% user id of owner
+ gid = 0 :: non_neg_integer(), %% group id of owner
+ size = 0 :: non_neg_integer(), %% length in bytes
+ mtime :: calendar:datetime(), %% modified time
+ typeflag :: char(), %% type of header entry
+ linkname = "" :: string(), %% target name of link
+ uname = "" :: string(), %% user name of owner
+ gname = "" :: string(), %% group name of owner
+ devmajor = 0 :: non_neg_integer(), %% major number of character or block device
+ devminor = 0 :: non_neg_integer(), %% minor number of character or block device
+ atime :: calendar:datetime(), %% access time
+ ctime :: calendar:datetime() %% status change time
+ }).
+-type tar_header() :: #tar_header{}.
+
+%% Metadata for a sparse file fragment
+-record(sparse_entry, {
+ offset = 0 :: non_neg_integer(),
+ num_bytes = 0 :: non_neg_integer()}).
+-type sparse_entry() :: #sparse_entry{}.
+%% Contains metadata about fragments of a sparse file
+-record(sparse_array, {
+ entries = [] :: [sparse_entry()],
+ is_extended = false :: boolean(),
+ max_entries = 0 :: non_neg_integer()}).
+-type sparse_array() :: #sparse_array{}.
+%% A subset of tar header fields common to all tar implementations
+-record(header_v7, {
+ name :: binary(),
+ mode :: binary(), %% octal
+ uid :: binary(), %% integer
+ gid :: binary(), %% integer
+ size :: binary(), %% integer
+ mtime :: binary(), %% integer
+ checksum :: binary(), %% integer
+ typeflag :: byte(), %% char
+ linkname :: binary()}).
+-type header_v7() :: #header_v7{}.
+%% The set of fields specific to GNU tar formatted archives
+-record(header_gnu, {
+ header_v7 :: header_v7(),
+ magic :: binary(),
+ version :: binary(),
+ uname :: binary(),
+ gname :: binary(),
+ devmajor :: binary(), %% integer
+ devminor :: binary(), %% integer
+ atime :: binary(), %% integer
+ ctime :: binary(), %% integer
+ sparse :: sparse_array(),
+ real_size :: binary()}). %% integer
+-type header_gnu() :: #header_gnu{}.
+%% The set of fields specific to STAR-formatted archives
+-record(header_star, {
+ header_v7 :: header_v7(),
+ magic :: binary(),
+ version :: binary(),
+ uname :: binary(),
+ gname :: binary(),
+ devmajor :: binary(), %% integer
+ devminor :: binary(), %% integer
+ prefix :: binary(),
+ atime :: binary(), %% integer
+ ctime :: binary(), %% integer
+ trailer :: binary()}).
+-type header_star() :: #header_star{}.
+%% The set of fields specific to USTAR-formatted archives
+-record(header_ustar, {
+ header_v7 :: header_v7(),
+ magic :: binary(),
+ version :: binary(),
+ uname :: binary(),
+ gname :: binary(),
+ devmajor :: binary(), %% integer
+ devminor :: binary(), %% integer
+ prefix :: binary()}).
+-type header_ustar() :: #header_ustar{}.
+
+-type header_fields() :: header_v7() |
+ header_gnu() |
+ header_star() |
+ header_ustar().
+
+%% The overall tar reader, it holds the low-level file handle,
+%% its access, position, and the I/O primitives wrapper.
+-record(reader, {
+ handle :: file:io_device() | term(),
+ access :: read | write | ram,
+ pos = 0 :: non_neg_integer(),
+ func :: file_op()
+ }).
+-type reader() :: #reader{}.
+%% A reader for a regular file within the tar archive,
+%% It tracks its current state relative to that file.
+-record(reg_file_reader, {
+ handle :: reader(),
+ num_bytes = 0,
+ pos = 0,
+ size = 0
+ }).
+-type reg_file_reader() :: #reg_file_reader{}.
+%% A reader for a sparse file within the tar archive,
+%% It tracks its current state relative to that file.
+-record(sparse_file_reader, {
+ handle :: reader(),
+ num_bytes = 0, %% bytes remaining
+ pos = 0, %% pos
+ size = 0, %% total size of file
+ sparse_map = #sparse_array{}
+ }).
+-type sparse_file_reader() :: #sparse_file_reader{}.
+
+%% Types for the readers
+-type reader_type() :: reader() | reg_file_reader() | sparse_file_reader().
+-type handle() :: file:io_device() | term().
+
+%% Type for the I/O primitive wrapper function
+-type file_op() :: fun((write | close | read2 | position,
+ {handle(), iodata()} | handle() | {handle(), non_neg_integer()}
+ | {handle(), non_neg_integer()}) ->
+ ok | eof | {ok, string() | binary()} | {ok, non_neg_integer()}
+ | {error, term()}).
+
+%% These constants (except S_IFMT) are
+%% used to determine what type of device
+%% a file is. Namely, `S_IFMT band file_info.mode`
+%% will equal one of these contants, and tells us
+%% which type it is. The stdlib file_info record
+%% does not differentiate between device types, and
+%% will not allow us to differentiate between sockets
+%% and named pipes. These constants are pulled from libc.
+-define(S_IFMT, 61440).
+-define(S_IFSOCK, 49152). %% socket
+-define(S_FIFO, 4096). %% fifo/named pipe
+-define(S_IFBLK, 24576). %% block device
+-define(S_IFCHR, 8192). %% character device
+
+%% Typeflag constants for the tar header
+-define(TYPE_REGULAR, $0). %% regular file
+-define(TYPE_REGULAR_A, 0). %% regular file
+-define(TYPE_LINK, $1). %% hard link
+-define(TYPE_SYMLINK, $2). %% symbolic link
+-define(TYPE_CHAR, $3). %% character device node
+-define(TYPE_BLOCK, $4). %% block device node
+-define(TYPE_DIR, $5). %% directory
+-define(TYPE_FIFO, $6). %% fifo node
+-define(TYPE_CONT, $7). %% reserved
+-define(TYPE_X_HEADER, $x). %% extended header
+-define(TYPE_X_GLOBAL_HEADER, $g). %% global extended header
+-define(TYPE_GNU_LONGNAME, $L). %% next file has a long name
+-define(TYPE_GNU_LONGLINK, $K). %% next file symlinks to a file with a long name
+-define(TYPE_GNU_SPARSE, $S). %% sparse file
+
+%% Mode constants from tar spec
+-define(MODE_ISUID, 4000). %% set uid
+-define(MODE_ISGID, 2000). %% set gid
+-define(MODE_ISVTX, 1000). %% save text (sticky bit)
+-define(MODE_ISDIR, 40000). %% directory
+-define(MODE_ISFIFO, 10000). %% fifo
+-define(MODE_ISREG, 100000). %% regular file
+-define(MODE_ISLNK, 120000). %% symbolic link
+-define(MODE_ISBLK, 60000). %% block special file
+-define(MODE_ISCHR, 20000). %% character special file
+-define(MODE_ISSOCK, 140000). %% socket
+
+%% Keywords for PAX extended header
+-define(PAX_ATIME, <<"atime">>).
+-define(PAX_CHARSET, <<"charset">>).
+-define(PAX_COMMENT, <<"comment">>).
+-define(PAX_CTIME, <<"ctime">>). %% ctime is not a valid pax header
+-define(PAX_GID, <<"gid">>).
+-define(PAX_GNAME, <<"gname">>).
+-define(PAX_LINKPATH, <<"linkpath">>).
+-define(PAX_MTIME, <<"mtime">>).
+-define(PAX_PATH, <<"path">>).
+-define(PAX_SIZE, <<"size">>).
+-define(PAX_UID, <<"uid">>).
+-define(PAX_UNAME, <<"uname">>).
+-define(PAX_XATTR, <<"SCHILY.xattr.">>).
+-define(PAX_XATTR_STR, "SCHILY.xattr.").
+-define(PAX_NONE, <<"">>).
+
+%% Tar format constants
+%% Unknown format
+-define(FORMAT_UNKNOWN, 0).
+%% The format of the original Unix V7 tar tool prior to standardization
+-define(FORMAT_V7, 1).
+%% The old and new GNU formats, incompatible with USTAR.
+%% This covers the old GNU sparse extension, but it does
+%% not cover the GNU sparse extensions using PAX headers,
+%% versions 0.0, 0.1, and 1.0; these fall under the PAX format.
+-define(FORMAT_GNU, 2).
+%% Schily's tar format, which is incompatible with USTAR.
+%% This does not cover STAR extensions to the PAX format; these
+%% fall under the PAX format.
+-define(FORMAT_STAR, 3).
+%% USTAR is the former standardization of tar defined in POSIX.1-1988,
+%% it is incompatible with the GNU and STAR formats.
+-define(FORMAT_USTAR, 4).
+%% PAX is the latest standardization of tar defined in POSIX.1-2001.
+%% This is an extension of USTAR and is "backwards compatible" with it.
+%%
+%% Some newer formats add their own extensions to PAX, such as GNU sparse
+%% files and SCHILY extended attributes. Since they are backwards compatible
+%% with PAX, they will be labelled as "PAX".
+-define(FORMAT_PAX, 5).
+
+%% Magic constants
+-define(MAGIC_GNU, <<"ustar ">>).
+-define(VERSION_GNU, <<" \x00">>).
+-define(MAGIC_USTAR, <<"ustar\x00">>).
+-define(VERSION_USTAR, <<"00">>).
+-define(TRAILER_STAR, <<"tar\x00">>).
+
+%% Size constants
+-define(BLOCK_SIZE, 512). %% size of each block in a tar stream
+-define(NAME_SIZE, 100). %% max length of the name field in USTAR format
+-define(PREFIX_SIZE, 155). %% max length of the prefix field in USTAR format
+
+%% Maximum size of a nanosecond value as an integer
+-define(MAX_NANO_INT_SIZE, 9).
+%% Maximum size of a 64-bit signed integer
+-define(MAX_INT64, (1 bsl 63 - 1)).
+
+-define(PAX_GNU_SPARSE_NUMBLOCKS, <<"GNU.sparse.numblocks">>).
+-define(PAX_GNU_SPARSE_OFFSET, <<"GNU.sparse.offset">>).
+-define(PAX_GNU_SPARSE_NUMBYTES, <<"GNU.sparse.numbytes">>).
+-define(PAX_GNU_SPARSE_MAP, <<"GNU.sparse.map">>).
+-define(PAX_GNU_SPARSE_NAME, <<"GNU.sparse.name">>).
+-define(PAX_GNU_SPARSE_MAJOR, <<"GNU.sparse.major">>).
+-define(PAX_GNU_SPARSE_MINOR, <<"GNU.sparse.minor">>).
+-define(PAX_GNU_SPARSE_SIZE, <<"GNU.sparse.size">>).
+-define(PAX_GNU_SPARSE_REALSIZE, <<"GNU.sparse.realsize">>).
+
+-define(V7_NAME, 0).
+-define(V7_NAME_LEN, 100).
+-define(V7_MODE, 100).
+-define(V7_MODE_LEN, 8).
+-define(V7_UID, 108).
+-define(V7_UID_LEN, 8).
+-define(V7_GID, 116).
+-define(V7_GID_LEN, 8).
+-define(V7_SIZE, 124).
+-define(V7_SIZE_LEN, 12).
+-define(V7_MTIME, 136).
+-define(V7_MTIME_LEN, 12).
+-define(V7_CHKSUM, 148).
+-define(V7_CHKSUM_LEN, 8).
+-define(V7_TYPE, 156).
+-define(V7_TYPE_LEN, 1).
+-define(V7_LINKNAME, 157).
+-define(V7_LINKNAME_LEN, 100).
+
+-define(STAR_TRAILER, 508).
+-define(STAR_TRAILER_LEN, 4).
+
+-define(USTAR_MAGIC, 257).
+-define(USTAR_MAGIC_LEN, 6).
+-define(USTAR_VERSION, 263).
+-define(USTAR_VERSION_LEN, 2).
+-define(USTAR_UNAME, 265).
+-define(USTAR_UNAME_LEN, 32).
+-define(USTAR_GNAME, 297).
+-define(USTAR_GNAME_LEN, 32).
+-define(USTAR_DEVMAJ, 329).
+-define(USTAR_DEVMAJ_LEN, 8).
+-define(USTAR_DEVMIN, 337).
+-define(USTAR_DEVMIN_LEN, 8).
+-define(USTAR_PREFIX, 345).
+-define(USTAR_PREFIX_LEN, 155).
+
+-define(GNU_MAGIC, 257).
+-define(GNU_MAGIC_LEN, 6).
+-define(GNU_VERSION, 263).
+-define(GNU_VERSION_LEN, 2).
+
+%% ?BLOCK_SIZE of zero-bytes.
+%% Two of these in a row mark the end of an archive.
+-define(ZERO_BLOCK, <<0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0>>).
+
+-define(BILLION, 1000000000).
+
+-define(EPOCH, {{1970,1,1}, {0,0,0}}).
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 6f3979bb77..d6b6d3f80c 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -22,9 +22,10 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2, borderline/1, atomic/1, long_names/1,
create_long_names/1, bad_tar/1, errors/1, extract_from_binary/1,
- extract_from_binary_compressed/1,
+ extract_from_binary_compressed/1, extract_filtered/1,
extract_from_open_file/1, symlinks/1, open_add_close/1, cooked_compressed/1,
- memory/1,unicode/1]).
+ memory/1,unicode/1,read_other_implementations/1,
+ sparse/1, init/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -35,7 +36,10 @@ all() ->
[borderline, atomic, long_names, create_long_names,
bad_tar, errors, extract_from_binary,
extract_from_binary_compressed, extract_from_open_file,
- symlinks, open_add_close, cooked_compressed, memory, unicode].
+ extract_filtered,
+ symlinks, open_add_close, cooked_compressed, memory, unicode,
+ read_other_implementations,
+ sparse,init].
groups() ->
[].
@@ -84,17 +88,30 @@ borderline(Config) when is_list(Config) ->
ok.
borderline_test(Size, TempDir) ->
- Archive = filename:join(TempDir, "ar_"++integer_to_list(Size)++".tar"),
- Name = filename:join(TempDir, "file_"++integer_to_list(Size)),
io:format("Testing size ~p", [Size]),
+ borderline_test(Size, TempDir, true),
+ borderline_test(Size, TempDir, false),
+ ok.
+
+borderline_test(Size, TempDir, IsUstar) ->
+ Prefix = case IsUstar of
+ true ->
+ "file_";
+ false ->
+ lists:duplicate(100, $f) ++ "ile_"
+ end,
+ SizeList = integer_to_list(Size),
+ Archive = filename:join(TempDir, "ar_"++ SizeList ++".tar"),
+ Name = filename:join(TempDir, Prefix++SizeList),
%% Create a file and archive it.
X0 = erlang:monotonic_time(),
- file:write_file(Name, random_byte_list(X0, Size)),
+ ok = file:write_file(Name, random_byte_list(X0, Size)),
ok = erl_tar:create(Archive, [Name]),
ok = file:delete(Name),
%% Verify listing and extracting.
+ IsUstar = is_ustar(Archive),
{ok, [Name]} = erl_tar:table(Archive),
ok = erl_tar:extract(Archive, [verbose]),
@@ -103,7 +120,12 @@ borderline_test(Size, TempDir) ->
true = match_byte_list(X0, binary_to_list(Bin)),
%% Verify that Unix tar can read it.
- tar_tf(Archive, Name),
+ case IsUstar of
+ true ->
+ tar_tf(Archive, Name);
+ false ->
+ ok
+ end,
ok.
@@ -336,6 +358,7 @@ create_long_names() ->
ok = erl_tar:tt(TarName),
%% Extract and verify.
+ true = is_ustar(TarName),
ExtractDir = "extract_dir",
ok = file:make_dir(ExtractDir),
ok = erl_tar:extract(TarName, [{cwd,ExtractDir}]),
@@ -357,7 +380,7 @@ make_dirs([], Dir) ->
%% Try erl_tar:table/2 and erl_tar:extract/2 on some corrupted tar files.
bad_tar(Config) when is_list(Config) ->
try_bad("bad_checksum", bad_header, Config),
- try_bad("bad_octal", bad_header, Config),
+ try_bad("bad_octal", invalid_tar_checksum, Config),
try_bad("bad_too_short", eof, Config),
try_bad("bad_even_shorter", eof, Config),
ok.
@@ -370,8 +393,10 @@ try_bad(Name0, Reason, Config) ->
Name = Name0 ++ ".tar",
io:format("~nTrying ~s", [Name]),
Full = filename:join(DataDir, Name),
- Opts = [verbose, {cwd, PrivDir}],
+ Dest = filename:join(PrivDir, Name0),
+ Opts = [verbose, {cwd, Dest}],
Expected = {error, Reason},
+ io:fwrite("Expected: ~p\n", [Expected]),
case {erl_tar:table(Full, Opts), erl_tar:extract(Full, Opts)} of
{Expected, Expected} ->
io:format("Result: ~p", [Expected]),
@@ -493,6 +518,27 @@ extract_from_binary_compressed(Config) when is_list(Config) ->
ok.
+%% Test extracting a tar archive from a binary.
+extract_filtered(Config) when is_list(Config) ->
+ DataDir = proplists:get_value(data_dir, Config),
+ PrivDir = proplists:get_value(priv_dir, Config),
+ Long = filename:join(DataDir, "no_fancy_stuff.tar"),
+ ExtractDir = filename:join(PrivDir, "extract_from_binary"),
+ ok = file:make_dir(ExtractDir),
+
+ ok = erl_tar:extract(Long, [{cwd,ExtractDir},{files,["no_fancy_stuff/EPLICENCE"]}]),
+
+ %% Verify.
+ Dir = filename:join(ExtractDir, "no_fancy_stuff"),
+ true = filelib:is_dir(Dir),
+ false = filelib:is_file(filename:join(Dir, "a_dir_list")),
+ true = filelib:is_file(filename:join(Dir, "EPLICENCE")),
+
+ %% Clean up.
+ delete_files([ExtractDir]),
+
+ ok.
+
%% Test extracting a tar archive from an open file.
extract_from_open_file(Config) when is_list(Config) ->
DataDir = proplists:get_value(data_dir, Config),
@@ -573,6 +619,7 @@ symlinks(Dir, BadSymlink, PointsTo) ->
ok = file:write_file(AFile, ALine),
ok = file:make_symlink(AFile, GoodSymlink),
ok = erl_tar:create(Tar, [BadSymlink, GoodSymlink, AFile], [verbose]),
+ true = is_ustar(Tar),
%% List contents of tar file.
@@ -581,6 +628,7 @@ symlinks(Dir, BadSymlink, PointsTo) ->
%% Also create another archive with the dereference flag.
ok = erl_tar:create(DerefTar, [AFile, GoodSymlink], [dereference, verbose]),
+ true = is_ustar(DerefTar),
%% Extract files to a new directory.
@@ -619,13 +667,50 @@ long_symlink(Dir) ->
ok = file:set_cwd(Dir),
AFile = "long_symlink",
- FarTooLong = "/tmp/aarrghh/this/path/is/far/longer/than/one/hundred/characters/which/is/the/maximum/number/of/characters/allowed",
- ok = file:make_symlink(FarTooLong, AFile),
- {error,Error} = erl_tar:create(Tar, [AFile], [verbose]),
- io:format("Error: ~s\n", [erl_tar:format_error(Error)]),
- {FarTooLong,symbolic_link_too_long} = Error,
+ RequiresPAX = "/tmp/aarrghh/this/path/is/far/longer/than/one/hundred/characters/which/is/the/maximum/number/of/characters/allowed",
+ ok = file:make_symlink(RequiresPAX, AFile),
+ ok = erl_tar:create(Tar, [AFile], [verbose]),
+ false = is_ustar(Tar),
+ NewDir = filename:join(Dir, "extracted"),
+ _ = file:make_dir(NewDir),
+ ok = erl_tar:extract(Tar, [{cwd, NewDir}, verbose]),
+ ok = file:set_cwd(NewDir),
+ {ok, #file_info{type=symlink}} = file:read_link_info(AFile),
+ {ok, RequiresPAX} = file:read_link(AFile),
+ ok.
+
+init(Config) when is_list(Config) ->
+ PrivDir = proplists:get_value(priv_dir, Config),
+ ok = file:set_cwd(PrivDir),
+ Dir = filename:join(PrivDir, "init"),
+ ok = file:make_dir(Dir),
+
+ [{FileOne,_,_}|_] = oac_files(),
+ TarOne = filename:join(Dir, "archive1.tar"),
+ {ok,Fd} = file:open(TarOne, [write]),
+
+ %% If the arity of the fun is wrong, badarg should be returned
+ {error, badarg} = erl_tar:init(Fd, write, fun file_op_bad/1),
+
+ %% Otherwise we should be good to go
+ {ok, Tar} = erl_tar:init(Fd, write, fun file_op/2),
+ ok = erl_tar:add(Tar, FileOne, []),
+ ok = erl_tar:close(Tar),
+ {ok, [FileOne]} = erl_tar:table(TarOne),
ok.
+file_op_bad(_) ->
+ throw({error, should_never_be_called}).
+
+file_op(write, {Fd, Data}) ->
+ file:write(Fd, Data);
+file_op(position, {Fd, Pos}) ->
+ file:position(Fd, Pos);
+file_op(read2, {Fd, Size}) ->
+ file:read(Fd, Size);
+file_op(close, Fd) ->
+ file:close(Fd).
+
open_add_close(Config) when is_list(Config) ->
PrivDir = proplists:get_value(priv_dir, Config),
ok = file:set_cwd(PrivDir),
@@ -643,17 +728,26 @@ open_add_close(Config) when is_list(Config) ->
TarOne = filename:join(Dir, "archive1.tar"),
{ok,AD} = erl_tar:open(TarOne, [write]),
ok = erl_tar:add(AD, FileOne, []),
- ok = erl_tar:add(AD, FileTwo, "second file", []),
- ok = erl_tar:add(AD, FileThree, [verbose]),
+
+ %% Add with {NameInArchive,Name}
+ ok = erl_tar:add(AD, {"second file", FileTwo}, []),
+
+ %% Add with {binary, Bin}
+ {ok,FileThreeBin} = file:read_file(FileThree),
+ ok = erl_tar:add(AD, {FileThree, FileThreeBin}, [verbose]),
+
+ %% Add with Name
ok = erl_tar:add(AD, FileThree, "chunked", [{chunks,11411},verbose]),
ok = erl_tar:add(AD, ADir, [verbose]),
ok = erl_tar:add(AD, AnotherDir, [verbose]),
ok = erl_tar:close(AD),
+ true = is_ustar(TarOne),
ok = erl_tar:t(TarOne),
ok = erl_tar:tt(TarOne),
- {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]} = erl_tar:table(TarOne),
+ Expected = {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]},
+ Expected = erl_tar:table(TarOne),
delete_files(["oac_file","oac_small","oac_big",Dir,AnotherDir,ADir]),
@@ -718,6 +812,41 @@ memory(Config) when is_list(Config) ->
ok = delete_files([Name1,Name2]),
ok.
+read_other_implementations(Config) when is_list(Config) ->
+ DataDir = proplists:get_value(data_dir, Config),
+ Files = ["v7.tar", "gnu.tar", "bsd.tar",
+ "star.tar", "pax_mtime.tar"],
+ do_read_other_implementations(Files, DataDir).
+
+do_read_other_implementations([], _DataDir) ->
+ ok;
+do_read_other_implementations([File|Rest], DataDir) ->
+ io:format("~nTrying ~s", [File]),
+ Full = filename:join(DataDir, File),
+ {ok, _} = erl_tar:table(Full),
+ {ok, _} = erl_tar:extract(Full, [memory]),
+ do_read_other_implementations(Rest, DataDir).
+
+
+%% Test handling of sparse files
+sparse(Config) when is_list(Config) ->
+ DataDir = proplists:get_value(data_dir, Config),
+ PrivDir = proplists:get_value(priv_dir, Config),
+ Sparse01Empty = "sparse01_empty.tar",
+ Sparse01 = "sparse01.tar",
+ Sparse10Empty = "sparse10_empty.tar",
+ Sparse10 = "sparse10.tar",
+ do_sparse([Sparse01Empty, Sparse01, Sparse10Empty, Sparse10], DataDir, PrivDir).
+
+do_sparse([], _DataDir, _PrivDir) ->
+ ok;
+do_sparse([Name|Rest], DataDir, PrivDir) ->
+ io:format("~nTrying sparse file ~s", [Name]),
+ Full = filename:join(DataDir, Name),
+ {ok, [_]} = erl_tar:table(Full),
+ {ok, _} = erl_tar:extract(Full, [memory]),
+ do_sparse(Rest, DataDir, PrivDir).
+
%% Test filenames with characters outside the US ASCII range.
unicode(Config) when is_list(Config) ->
run_unicode_node(Config, "+fnu"),
@@ -753,6 +882,9 @@ do_unicode(PrivDir) ->
Names = lists:sort(unicode_create_files()),
Tar = "unicöde.tar",
ok = erl_tar:create(Tar, ["unicöde"], []),
+
+ %% Unicode filenames require PAX format.
+ false = is_ustar(Tar),
{ok,Names0} = erl_tar:table(Tar, []),
Names = lists:sort(Names0),
_ = [ok = file:delete(Name) || Name <- Names],
@@ -850,3 +982,15 @@ start_node(Name, Args) ->
ct:log("Node ~p started~n", [Node]),
Node
end.
+
+%% Test that the given tar file is a plain USTAR archive,
+%% without any PAX extensions.
+is_ustar(File) ->
+ {ok,Bin} = file:read_file(File),
+ <<_:257/binary,"ustar",0,_/binary>> = Bin,
+ <<_:156/binary,Type:8,_/binary>> = Bin,
+ case Type of
+ $x -> false;
+ $g -> false;
+ _ -> true
+ end.
diff --git a/lib/stdlib/test/tar_SUITE_data/bsd.tar b/lib/stdlib/test/tar_SUITE_data/bsd.tar
new file mode 100644
index 0000000000..8c31864be0
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/bsd.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/gnu.tar b/lib/stdlib/test/tar_SUITE_data/gnu.tar
new file mode 100644
index 0000000000..60268065c1
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/gnu.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar b/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar
new file mode 100644
index 0000000000..1b6e80ffac
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/pax_mtime.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/sparse00.tar b/lib/stdlib/test/tar_SUITE_data/sparse00.tar
new file mode 100644
index 0000000000..61a04de90b
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/sparse00.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/sparse01.tar b/lib/stdlib/test/tar_SUITE_data/sparse01.tar
new file mode 100644
index 0000000000..61a04de90b
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/sparse01.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar b/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar
new file mode 100644
index 0000000000..efa6d060f4
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/sparse01_empty.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/sparse10.tar b/lib/stdlib/test/tar_SUITE_data/sparse10.tar
new file mode 100644
index 0000000000..61a04de90b
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/sparse10.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar b/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar
new file mode 100644
index 0000000000..efa6d060f4
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/sparse10_empty.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/star.tar b/lib/stdlib/test/tar_SUITE_data/star.tar
new file mode 100644
index 0000000000..b0631e3b13
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/star.tar
Binary files differ
diff --git a/lib/stdlib/test/tar_SUITE_data/v7.tar b/lib/stdlib/test/tar_SUITE_data/v7.tar
new file mode 100644
index 0000000000..9918e006bb
--- /dev/null
+++ b/lib/stdlib/test/tar_SUITE_data/v7.tar
Binary files differ