diff options
-rw-r--r-- | lib/stdlib/doc/src/erl_tar.xml | 110 | ||||
-rw-r--r-- | lib/stdlib/src/erl_tar.erl | 93 | ||||
-rw-r--r-- | lib/stdlib/test/tar_SUITE.erl | 3 |
3 files changed, 187 insertions, 19 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml index 7f25f5b7bc..95eefb8f9b 100644 --- a/lib/stdlib/doc/src/erl_tar.xml +++ b/lib/stdlib/doc/src/erl_tar.xml @@ -80,6 +80,12 @@ </section> <section> + <title>OTHER STORAGE MEDIA</title> + <p>The <c>erl_ftp</c> module normally accesses the tar-file on disk using the <seealso marker="kernel:file">file module</seealso>. When other needs arise, there is a way to define your own low-level Erlang functions to perform the writing and reading on the storage media. See <seealso marker="#init/3">init/3</seealso> for usage.</p> + <p>An example of this is the sftp support in <seealso marker="ssh:ssh_sftp#open_tar/3">ssh_sftp:open_tar/3</seealso>. That function opens a tar file on a remote machine using an sftp channel.</p> + </section> + + <section> <title>LIMITATIONS</title> <p>For maximum compatibility, it is safe to archive files with names up to 100 characters in length. Such tar files can generally be @@ -99,7 +105,8 @@ <v>TarDescriptor = term()</v> <v>Filename = filename()</v> <v>Options = [Option]</v> - <v>Option = dereference|verbose</v> + <v>Option = dereference|verbose|{chunks,ChunkSize}</v> + <v>ChunkSize = positive_integer()</v> <v>RetValue = ok|{error,{Filename,Reason}}</v> <v>Reason = term()</v> </type> @@ -119,6 +126,12 @@ <item> <p>Print an informational message about the file being added.</p> </item> + <tag><c>{chunks,ChunkSize}</c></tag> + <item> + <p>Read data in parts from the file. This is intended for memory-limited + machines that for example builds a tar file on a remote machine over + <seealso marker="ssh:ssh_sftp#open_tar/3">sftp</seealso>.</p> + </item> </taglist> </desc> </func> @@ -389,6 +402,101 @@ </warning> </desc> </func> + + <func> + <name>init(UserPrivate, AccessMode, Fun) -> {ok,TarDescriptor} | {error,Reason} +</name> + <fsummary>Creates a TarDescriptor used in subsequent tar operations when + defining own low-level storage access functions + </fsummary> + <type> + <v>UserPrivate = term()</v> + <v>AccessMode = [write] | [read]</v> + <v>Fun when AccessMode is [write] = fun(write, {UserPrivate,DataToWrite})->...; + (position,{UserPrivate,Position})->...; + (close, UserPrivate)->... + end + </v> + <v>Fun when AccessMode is [read] = fun(read2, {UserPrivate,Size})->...; + (position,{UserPrivate,Position})->...; + (close, UserPrivate)->... + end + </v> + <v>TarDescriptor = term()</v> + <v>Reason = term()</v> + </type> + <desc> + <p>The <c>Fun</c> is the definition of what to do when the different + storage operations functions are to be called from the higher tar + handling functions (<c>add/3</c>, <c>add/4</c>, <c>close/1</c>...). + </p> + <p>The <c>Fun</c> will be called when the tar function wants to do + a low-level operation, like writing a block to a file. The Fun is called + as <c>Fun(Op,{UserPrivate,Parameters...})</c> where <c>Op</c> is the operation name, + <c>UserPrivate</c> is the term passed as the first argument to <c>init/1</c> and + <c>Parameters...</c> are the data added by the tar function to be passed down to + the storage handling function. + </p> + <p>The parameter <c>UserPrivate</c> is typically the result of opening a low level + structure like a file descriptor, a sftp channel id or such. The different <c>Fun</c> + clauses operates on that very term. + </p> + <p>The fun clauses parameter lists are: + <taglist> + <tag><c>(write, {UserPrivate,DataToWrite})</c></tag> + <item>Write the term <c>DataToWrite</c> using <c>UserPrivate</c></item> + <tag><c>(close, UserPrivate)</c></tag> + <item>Close the access.</item> + <tag><c>(read2, {UserPrivate,Size})</c></tag> + <item>Read using <c>UserPrivate</c> but only <c>Size</c> bytes. Note that there is + only an arity-2 read function, not an arity-1 + </item> + <tag><c> (position,{UserPrivate,Position})</c></tag> + <item>Sets the position of <c>UserPrivate</c> as defined for files in <seealso marker="kernel:file#position-2">file:position/2</seealso></item> + <tag><c></c></tag> + <item></item> + </taglist> + </p> + <p>A complete <c>Fun</c> parameter for reading and writing on files using the + <seealso marker="kernel:file">file module</seealso> could be: + </p> + <code type="none"> + ExampleFun = + fun(write, {Fd,Data}) -> file:write(Fd, Data); + (position, {Fd,Pos}) -> file:position(Fd, Pos); + (read2, {Fd,Size}) -> file:read(Fd,Size); + (close, Fd) -> file:close(Fd) + end + </code> + <p>where <c>Fd</c> was given to the <c>init/3</c> function as:</p> + <code> + {ok,Fd} = file:open(Name,...). + {ok,TarDesc} = erl_tar:init(Fd, [write], ExampleFun), + </code> + <p>The <c>TarDesc</c> is then used:</p> + <code> + erl_tar:add(TarDesc, SomeValueIwantToAdd, FileNameInTarFile), + ...., + erl_tar:close(TarDesc) + </code> + <p>When the erl_tar core wants to e.g. write a piece of Data, it would call + <c>ExampleFun(write,{UserPrivate,Data})</c>. + </p> + <note> + <p>The example above with <c>file</c> module operations is not necessary to + use directly since that is what the <seealso marker="#open">open</seealso> function + in principle does. + </p> + </note> + <warning> + <p>The <c>TarDescriptor</c> term is not a file descriptor. + You should not rely on the specific contents of the <c>TarDescriptor</c> + term, as it may change in future versions as more features are added + to the <c>erl_tar</c> module.</p> + </warning> + </desc> + </func> + <func> <name>table(Name) -> RetValue</name> <fsummary>Retrieve the name of all files in a tar file</fsummary> diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl index acf7a5cd40..ab6223c0fe 100644 --- a/lib/stdlib/src/erl_tar.erl +++ b/lib/stdlib/src/erl_tar.erl @@ -22,7 +22,7 @@ %% Purpose: Unix tar (tape archive) utility. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --export([create/2, create/3, extract/1, extract/2, table/1, table/2, +-export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2, open/2, close/1, add/3, add/4, t/1, tt/1, format_error/1]). @@ -30,10 +30,16 @@ -record(add_opts, {read_info, % Fun to use for read file/link info. + chunk_size = 0, % For file reading when sending to sftp. 0=do not chunk verbose = false :: boolean()}). % Verbose on/off. %% Opens a tar archive. +init(UsrHandle, AccessMode, Fun) when is_function(Fun,2) -> + {ok, {AccessMode,{UsrHandle,Fun}}}. + +%%%================================================================ +%%% The open function with friends is to keep the file and binary api of this module open(Name, Mode) -> case open_mode(Mode) of {ok, Access, Raw, Opts} -> @@ -46,27 +52,37 @@ open1({binary,Bin}, read, _Raw, Opts) -> case file:open(Bin, [ram,binary,read]) of {ok,File} -> _ = [ram_file:uncompress(File) || Opts =:= [compressed]], - {ok,{read,File}}; + init(File,read,file_fun()); Error -> Error end; open1({file, Fd}, read, _Raw, _Opts) -> - {ok, {read, Fd}}; + init(Fd, read, file_fun()); open1(Name, Access, Raw, Opts) -> case file:open(Name, Raw ++ [binary, Access|Opts]) of {ok, File} -> - {ok, {Access, File}}; + init(File, Access, file_fun()); {error, Reason} -> {error, {Name, Reason}} end. +file_fun() -> + fun(write, {Fd,Data}) -> file:write(Fd, Data); + (position, {Fd,Pos}) -> file:position(Fd, Pos); + (read2, {Fd,Size}) -> file:read(Fd,Size); + (close, Fd) -> file:close(Fd) + end. + +%%% End of file and binary api (except for open_mode/1 downwards +%%%================================================================ + %% Closes a tar archive. close({read, File}) -> - ok = file:close(File); + ok = do_close(File); close({write, File}) -> PadResult = pad_file(File), - ok = file:close(File), + ok = do_close(File), PadResult; close(_) -> {error, einval}. @@ -75,7 +91,6 @@ close(_) -> add(File, Name, Options) -> add(File, Name, Name, Options). - add({write, File}, Name, NameInArchive, Options) -> Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end}, add1(File, Name, NameInArchive, add_opts(Options, Opts)); @@ -88,6 +103,8 @@ add_opts([dereference|T], Opts) -> add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end}); add_opts([verbose|T], Opts) -> add_opts(T, Opts#add_opts{verbose=true}); +add_opts([{chunks,N}|T], Opts) -> + add_opts(T, Opts#add_opts{chunk_size=N}); add_opts([_|T], Opts) -> add_opts(T, Opts); add_opts([], Opts) -> @@ -321,16 +338,46 @@ add1(TarFile, Name, NameInArchive, Opts) -> {error, {Name, Reason}} end. +add1(Tar, Name, Header, chunked, Options) -> + add_verbose(Options, "a ~ts [chunked ", [Name]), + try + ok = do_write(Tar, Header), + {ok,D} = file:open(Name, [read,binary]), + {ok,NumBytes} = add_read_write_chunks(D, Tar, Options#add_opts.chunk_size, 0, Options), + _ = file:close(D), + ok = do_write(Tar, padding(NumBytes,?record_size)) + of + ok -> + add_verbose(Options, "~n", []), + ok + catch + error:{badmatch,{error,Error}} -> + add_verbose(Options, "~n", []), + {error,{Name,Error}} + end; add1(Tar, Name, Header, Bin, Options) -> add_verbose(Options, "a ~ts~n", [Name]), - file:write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]). + do_write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]). + +add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes, Options) -> + case file:read(D, ChunkSize) of + {ok,Bin} -> + ok = do_write(Tar, Bin), + add_verbose(Options, ".", []), + add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes+byte_size(Bin), Options); + eof -> + add_verbose(Options, "]", []), + {ok,SumNumBytes}; + Other -> + Other + end. add_directory(TarFile, DirName, NameInArchive, Info, Options) -> case file:list_dir(DirName) of {ok, []} -> add_verbose(Options, "a ~ts~n", [DirName]), Header = create_header(NameInArchive, Info), - file:write(TarFile, Header); + do_write(TarFile, Header); {ok, Files} -> Add = fun (File) -> add1(TarFile, @@ -396,7 +443,7 @@ to_string(Str0, Count) -> %% Pads out end of file. pad_file(File) -> - {ok,Position} = file:position(File, {cur,0}), + {ok,Position} = do_position(File, {cur,0}), %% There must be at least two zero records at the end. Fill = case ?block_size - (Position rem ?block_size) of Fill0 when Fill0 < 2*?record_size -> @@ -407,7 +454,7 @@ pad_file(File) -> %% Large enough. Fill0 end, - file:write(File, zeroes(Fill)). + do_write(File, zeroes(Fill)). split_filename(Name) when length(Name) =< ?th_name_len -> {"", Name}; @@ -500,7 +547,7 @@ foldl_read(TarName, Fun, Accu, Opts) -> Ok -> Ok end, - ok = file:close(File), + ok = do_close(File), Result; Error -> Error @@ -559,7 +606,7 @@ check_extract(Name, #read_opts{files=Files}) -> ordsets:is_element(Name, Files). get_header(File) -> - case file:read(File, ?record_size) of + case do_read(File, ?record_size) of eof -> throw({error,eof}); {ok, Bin} when is_binary(Bin) -> @@ -690,7 +737,7 @@ get_element(File, #tar_header{size = 0}) -> skip_to_next(File), {ok,<<>>}; get_element(File, #tar_header{size = Size}) -> - case file:read(File, Size) of + case do_read(File, Size) of {ok,Bin}=Res when byte_size(Bin) =:= Size -> skip_to_next(File), Res; @@ -880,7 +927,7 @@ skip(File, Size) -> %% Note: There is no point in handling failure to get the current position %% in the file. If it doesn't work, something serious is wrong. Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size, - {ok,_} = file:position(File, {cur, Amount}), + {ok,_} = do_position(File, {cur, Amount}), ok. %% Skips to the next record in the file. @@ -888,9 +935,9 @@ skip(File, Size) -> skip_to_next(File) -> %% Note: There is no point in handling failure to get the current position %% in the file. If it doesn't work, something serious is wrong. - {ok, Position} = file:position(File, {cur, 0}), + {ok, Position} = do_position(File, {cur, 0}), NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size, - {ok,NewPosition} = file:position(File, NewPosition), + {ok,NewPosition} = do_position(File, NewPosition), ok. %% Prints the message on if the verbose option is given. @@ -916,6 +963,9 @@ posix_to_erlang_time(Sec) -> read_file_and_info(Name, Opts) -> ReadInfo = Opts#add_opts.read_info, case ReadInfo(Name) of + {ok,Info} when Info#file_info.type =:= regular, + Opts#add_opts.chunk_size>0 -> + {ok,chunked,Info}; {ok,Info} when Info#file_info.type =:= regular -> case file:read_file(Name) of {ok,Bin} -> @@ -962,3 +1012,12 @@ open_mode([], Access, Raw, Opts) -> {ok, Access, Raw, Opts}; open_mode(_, _, _, _) -> {error, einval}. + +%%%================================================================ +do_write({UsrHandle,Fun}, Data) -> Fun(write,{UsrHandle,Data}). + +do_position({UsrHandle,Fun}, Pos) -> Fun(position,{UsrHandle,Pos}). + +do_read({UsrHandle,Fun}, Len) -> Fun(read2,{UsrHandle,Len}). + +do_close({UsrHandle,Fun}) -> Fun(close,UsrHandle). diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl index 6349139925..9b6d65011e 100644 --- a/lib/stdlib/test/tar_SUITE.erl +++ b/lib/stdlib/test/tar_SUITE.erl @@ -654,6 +654,7 @@ open_add_close(Config) when is_list(Config) -> ?line ok = erl_tar:add(AD, FileOne, []), ?line ok = erl_tar:add(AD, FileTwo, "second file", []), ?line ok = erl_tar:add(AD, FileThree, [verbose]), + ?line ok = erl_tar:add(AD, FileThree, "chunked", [{chunks,11411},verbose]), ?line ok = erl_tar:add(AD, ADir, [verbose]), ?line ok = erl_tar:add(AD, AnotherDir, [verbose]), ?line ok = erl_tar:close(AD), @@ -661,7 +662,7 @@ open_add_close(Config) when is_list(Config) -> ?line ok = erl_tar:t(TarOne), ?line ok = erl_tar:tt(TarOne), - ?line {ok,[FileOne,"second file",FileThree,ADir,SomeContent]} = erl_tar:table(TarOne), + ?line {ok,[FileOne,"second file",FileThree,"chunked",ADir,SomeContent]} = erl_tar:table(TarOne), ?line delete_files(["oac_file","oac_small","oac_big",Dir,AnotherDir,ADir]), |