aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml110
-rw-r--r--lib/stdlib/src/erl_tar.erl93
-rw-r--r--lib/stdlib/test/tar_SUITE.erl3
3 files changed, 187 insertions, 19 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index 7f25f5b7bc..95eefb8f9b 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -80,6 +80,12 @@
</section>
<section>
+ <title>OTHER STORAGE MEDIA</title>
+ <p>The <c>erl_ftp</c> module normally accesses the tar-file on disk using the <seealso marker="kernel:file">file module</seealso>. When other needs arise, there is a way to define your own low-level Erlang functions to perform the writing and reading on the storage media. See <seealso marker="#init/3">init/3</seealso> for usage.</p>
+ <p>An example of this is the sftp support in <seealso marker="ssh:ssh_sftp#open_tar/3">ssh_sftp:open_tar/3</seealso>. That function opens a tar file on a remote machine using an sftp channel.</p>
+ </section>
+
+ <section>
<title>LIMITATIONS</title>
<p>For maximum compatibility, it is safe to archive files with names
up to 100 characters in length. Such tar files can generally be
@@ -99,7 +105,8 @@
<v>TarDescriptor = term()</v>
<v>Filename = filename()</v>
<v>Options = [Option]</v>
- <v>Option = dereference|verbose</v>
+ <v>Option = dereference|verbose|{chunks,ChunkSize}</v>
+ <v>ChunkSize = positive_integer()</v>
<v>RetValue = ok|{error,{Filename,Reason}}</v>
<v>Reason = term()</v>
</type>
@@ -119,6 +126,12 @@
<item>
<p>Print an informational message about the file being added.</p>
</item>
+ <tag><c>{chunks,ChunkSize}</c></tag>
+ <item>
+ <p>Read data in parts from the file. This is intended for memory-limited
+ machines that for example builds a tar file on a remote machine over
+ <seealso marker="ssh:ssh_sftp#open_tar/3">sftp</seealso>.</p>
+ </item>
</taglist>
</desc>
</func>
@@ -389,6 +402,101 @@
</warning>
</desc>
</func>
+
+ <func>
+ <name>init(UserPrivate, AccessMode, Fun) -> {ok,TarDescriptor} | {error,Reason}
+</name>
+ <fsummary>Creates a TarDescriptor used in subsequent tar operations when
+ defining own low-level storage access functions
+ </fsummary>
+ <type>
+ <v>UserPrivate = term()</v>
+ <v>AccessMode = [write] | [read]</v>
+ <v>Fun when AccessMode is [write] = fun(write, {UserPrivate,DataToWrite})->...;
+ (position,{UserPrivate,Position})->...;
+ (close, UserPrivate)->...
+ end
+ </v>
+ <v>Fun when AccessMode is [read] = fun(read2, {UserPrivate,Size})->...;
+ (position,{UserPrivate,Position})->...;
+ (close, UserPrivate)->...
+ end
+ </v>
+ <v>TarDescriptor = term()</v>
+ <v>Reason = term()</v>
+ </type>
+ <desc>
+ <p>The <c>Fun</c> is the definition of what to do when the different
+ storage operations functions are to be called from the higher tar
+ handling functions (<c>add/3</c>, <c>add/4</c>, <c>close/1</c>...).
+ </p>
+ <p>The <c>Fun</c> will be called when the tar function wants to do
+ a low-level operation, like writing a block to a file. The Fun is called
+ as <c>Fun(Op,{UserPrivate,Parameters...})</c> where <c>Op</c> is the operation name,
+ <c>UserPrivate</c> is the term passed as the first argument to <c>init/1</c> and
+ <c>Parameters...</c> are the data added by the tar function to be passed down to
+ the storage handling function.
+ </p>
+ <p>The parameter <c>UserPrivate</c> is typically the result of opening a low level
+ structure like a file descriptor, a sftp channel id or such. The different <c>Fun</c>
+ clauses operates on that very term.
+ </p>
+ <p>The fun clauses parameter lists are:
+ <taglist>
+ <tag><c>(write, {UserPrivate,DataToWrite})</c></tag>
+ <item>Write the term <c>DataToWrite</c> using <c>UserPrivate</c></item>
+ <tag><c>(close, UserPrivate)</c></tag>
+ <item>Close the access.</item>
+ <tag><c>(read2, {UserPrivate,Size})</c></tag>
+ <item>Read using <c>UserPrivate</c> but only <c>Size</c> bytes. Note that there is
+ only an arity-2 read function, not an arity-1
+ </item>
+ <tag><c> (position,{UserPrivate,Position})</c></tag>
+ <item>Sets the position of <c>UserPrivate</c> as defined for files in <seealso marker="kernel:file#position-2">file:position/2</seealso></item>
+ <tag><c></c></tag>
+ <item></item>
+ </taglist>
+ </p>
+ <p>A complete <c>Fun</c> parameter for reading and writing on files using the
+ <seealso marker="kernel:file">file module</seealso> could be:
+ </p>
+ <code type="none">
+ ExampleFun =
+ fun(write, {Fd,Data}) -> file:write(Fd, Data);
+ (position, {Fd,Pos}) -> file:position(Fd, Pos);
+ (read2, {Fd,Size}) -> file:read(Fd,Size);
+ (close, Fd) -> file:close(Fd)
+ end
+ </code>
+ <p>where <c>Fd</c> was given to the <c>init/3</c> function as:</p>
+ <code>
+ {ok,Fd} = file:open(Name,...).
+ {ok,TarDesc} = erl_tar:init(Fd, [write], ExampleFun),
+ </code>
+ <p>The <c>TarDesc</c> is then used:</p>
+ <code>
+ erl_tar:add(TarDesc, SomeValueIwantToAdd, FileNameInTarFile),
+ ....,
+ erl_tar:close(TarDesc)
+ </code>
+ <p>When the erl_tar core wants to e.g. write a piece of Data, it would call
+ <c>ExampleFun(write,{UserPrivate,Data})</c>.
+ </p>
+ <note>
+ <p>The example above with <c>file</c> module operations is not necessary to
+ use directly since that is what the <seealso marker="#open">open</seealso> function
+ in principle does.
+ </p>
+ </note>
+ <warning>
+ <p>The <c>TarDescriptor</c> term is not a file descriptor.
+ You should not rely on the specific contents of the <c>TarDescriptor</c>
+ term, as it may change in future versions as more features are added
+ to the <c>erl_tar</c> module.</p>
+ </warning>
+ </desc>
+ </func>
+
<func>
<name>table(Name) -> RetValue</name>
<fsummary>Retrieve the name of all files in a tar file</fsummary>
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index acf7a5cd40..ab6223c0fe 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -22,7 +22,7 @@
%% Purpose: Unix tar (tape archive) utility.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--export([create/2, create/3, extract/1, extract/2, table/1, table/2,
+-export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2,
open/2, close/1, add/3, add/4,
t/1, tt/1, format_error/1]).
@@ -30,10 +30,16 @@
-record(add_opts,
{read_info, % Fun to use for read file/link info.
+ chunk_size = 0, % For file reading when sending to sftp. 0=do not chunk
verbose = false :: boolean()}). % Verbose on/off.
%% Opens a tar archive.
+init(UsrHandle, AccessMode, Fun) when is_function(Fun,2) ->
+ {ok, {AccessMode,{UsrHandle,Fun}}}.
+
+%%%================================================================
+%%% The open function with friends is to keep the file and binary api of this module
open(Name, Mode) ->
case open_mode(Mode) of
{ok, Access, Raw, Opts} ->
@@ -46,27 +52,37 @@ open1({binary,Bin}, read, _Raw, Opts) ->
case file:open(Bin, [ram,binary,read]) of
{ok,File} ->
_ = [ram_file:uncompress(File) || Opts =:= [compressed]],
- {ok,{read,File}};
+ init(File,read,file_fun());
Error ->
Error
end;
open1({file, Fd}, read, _Raw, _Opts) ->
- {ok, {read, Fd}};
+ init(Fd, read, file_fun());
open1(Name, Access, Raw, Opts) ->
case file:open(Name, Raw ++ [binary, Access|Opts]) of
{ok, File} ->
- {ok, {Access, File}};
+ init(File, Access, file_fun());
{error, Reason} ->
{error, {Name, Reason}}
end.
+file_fun() ->
+ fun(write, {Fd,Data}) -> file:write(Fd, Data);
+ (position, {Fd,Pos}) -> file:position(Fd, Pos);
+ (read2, {Fd,Size}) -> file:read(Fd,Size);
+ (close, Fd) -> file:close(Fd)
+ end.
+
+%%% End of file and binary api (except for open_mode/1 downwards
+%%%================================================================
+
%% Closes a tar archive.
close({read, File}) ->
- ok = file:close(File);
+ ok = do_close(File);
close({write, File}) ->
PadResult = pad_file(File),
- ok = file:close(File),
+ ok = do_close(File),
PadResult;
close(_) ->
{error, einval}.
@@ -75,7 +91,6 @@ close(_) ->
add(File, Name, Options) ->
add(File, Name, Name, Options).
-
add({write, File}, Name, NameInArchive, Options) ->
Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end},
add1(File, Name, NameInArchive, add_opts(Options, Opts));
@@ -88,6 +103,8 @@ add_opts([dereference|T], Opts) ->
add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end});
add_opts([verbose|T], Opts) ->
add_opts(T, Opts#add_opts{verbose=true});
+add_opts([{chunks,N}|T], Opts) ->
+ add_opts(T, Opts#add_opts{chunk_size=N});
add_opts([_|T], Opts) ->
add_opts(T, Opts);
add_opts([], Opts) ->
@@ -321,16 +338,46 @@ add1(TarFile, Name, NameInArchive, Opts) ->
{error, {Name, Reason}}
end.
+add1(Tar, Name, Header, chunked, Options) ->
+ add_verbose(Options, "a ~ts [chunked ", [Name]),
+ try
+ ok = do_write(Tar, Header),
+ {ok,D} = file:open(Name, [read,binary]),
+ {ok,NumBytes} = add_read_write_chunks(D, Tar, Options#add_opts.chunk_size, 0, Options),
+ _ = file:close(D),
+ ok = do_write(Tar, padding(NumBytes,?record_size))
+ of
+ ok ->
+ add_verbose(Options, "~n", []),
+ ok
+ catch
+ error:{badmatch,{error,Error}} ->
+ add_verbose(Options, "~n", []),
+ {error,{Name,Error}}
+ end;
add1(Tar, Name, Header, Bin, Options) ->
add_verbose(Options, "a ~ts~n", [Name]),
- file:write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]).
+ do_write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]).
+
+add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes, Options) ->
+ case file:read(D, ChunkSize) of
+ {ok,Bin} ->
+ ok = do_write(Tar, Bin),
+ add_verbose(Options, ".", []),
+ add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes+byte_size(Bin), Options);
+ eof ->
+ add_verbose(Options, "]", []),
+ {ok,SumNumBytes};
+ Other ->
+ Other
+ end.
add_directory(TarFile, DirName, NameInArchive, Info, Options) ->
case file:list_dir(DirName) of
{ok, []} ->
add_verbose(Options, "a ~ts~n", [DirName]),
Header = create_header(NameInArchive, Info),
- file:write(TarFile, Header);
+ do_write(TarFile, Header);
{ok, Files} ->
Add = fun (File) ->
add1(TarFile,
@@ -396,7 +443,7 @@ to_string(Str0, Count) ->
%% Pads out end of file.
pad_file(File) ->
- {ok,Position} = file:position(File, {cur,0}),
+ {ok,Position} = do_position(File, {cur,0}),
%% There must be at least two zero records at the end.
Fill = case ?block_size - (Position rem ?block_size) of
Fill0 when Fill0 < 2*?record_size ->
@@ -407,7 +454,7 @@ pad_file(File) ->
%% Large enough.
Fill0
end,
- file:write(File, zeroes(Fill)).
+ do_write(File, zeroes(Fill)).
split_filename(Name) when length(Name) =< ?th_name_len ->
{"", Name};
@@ -500,7 +547,7 @@ foldl_read(TarName, Fun, Accu, Opts) ->
Ok ->
Ok
end,
- ok = file:close(File),
+ ok = do_close(File),
Result;
Error ->
Error
@@ -559,7 +606,7 @@ check_extract(Name, #read_opts{files=Files}) ->
ordsets:is_element(Name, Files).
get_header(File) ->
- case file:read(File, ?record_size) of
+ case do_read(File, ?record_size) of
eof ->
throw({error,eof});
{ok, Bin} when is_binary(Bin) ->
@@ -690,7 +737,7 @@ get_element(File, #tar_header{size = 0}) ->
skip_to_next(File),
{ok,<<>>};
get_element(File, #tar_header{size = Size}) ->
- case file:read(File, Size) of
+ case do_read(File, Size) of
{ok,Bin}=Res when byte_size(Bin) =:= Size ->
skip_to_next(File),
Res;
@@ -880,7 +927,7 @@ skip(File, Size) ->
%% Note: There is no point in handling failure to get the current position
%% in the file. If it doesn't work, something serious is wrong.
Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size,
- {ok,_} = file:position(File, {cur, Amount}),
+ {ok,_} = do_position(File, {cur, Amount}),
ok.
%% Skips to the next record in the file.
@@ -888,9 +935,9 @@ skip(File, Size) ->
skip_to_next(File) ->
%% Note: There is no point in handling failure to get the current position
%% in the file. If it doesn't work, something serious is wrong.
- {ok, Position} = file:position(File, {cur, 0}),
+ {ok, Position} = do_position(File, {cur, 0}),
NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size,
- {ok,NewPosition} = file:position(File, NewPosition),
+ {ok,NewPosition} = do_position(File, NewPosition),
ok.
%% Prints the message on if the verbose option is given.
@@ -916,6 +963,9 @@ posix_to_erlang_time(Sec) ->
read_file_and_info(Name, Opts) ->
ReadInfo = Opts#add_opts.read_info,
case ReadInfo(Name) of
+ {ok,Info} when Info#file_info.type =:= regular,
+ Opts#add_opts.chunk_size>0 ->
+ {ok,chunked,Info};
{ok,Info} when Info#file_info.type =:= regular ->
case file:read_file(Name) of
{ok,Bin} ->
@@ -962,3 +1012,12 @@ open_mode([], Access, Raw, Opts) ->
{ok, Access, Raw, Opts};
open_mode(_, _, _, _) ->
{error, einval}.
+
+%%%================================================================
+do_write({UsrHandle,Fun}, Data) -> Fun(write,{UsrHandle,Data}).
+
+do_position({UsrHandle,Fun}, Pos) -> Fun(position,{UsrHandle,Pos}).
+
+do_read({UsrHandle,Fun}, Len) -> Fun(read2,{UsrHandle,Len}).
+
+do_close({UsrHandle,Fun}) -> Fun(close,UsrHandle).
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index 6349139925..9b6d65011e 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -654,6 +654,7 @@ open_add_close(Config) when is_list(Config) ->
?line ok = erl_tar:add(AD, FileOne, []),
?line ok = erl_tar:add(AD, FileTwo, "second file", []),
?line ok = erl_tar:add(AD, FileThree, [verbose]),
+ ?line ok = erl_tar:add(AD, FileThree, "chunked", [{chunks,11411},verbose]),
?line ok = erl_tar:add(AD, ADir, [verbose]),
?line ok = erl_tar:add(AD, AnotherDir, [verbose]),
?line ok = erl_tar:close(AD),
@@ -661,7 +662,7 @@ open_add_close(Config) when is_list(Config) ->
?line ok = erl_tar:t(TarOne),
?line ok = erl_tar:tt(TarOne),
- ?line {ok,[FileOne,"second file",FileThree,ADir,SomeContent]} = erl_tar:table(TarOne),
+ ?line {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]} = erl_tar:table(TarOne),
?line delete_files(["oac_file","oac_small","oac_big",Dir,AnotherDir,ADir]),