From 4432ac28c118622c2994440b5be3bff0bc77cc83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= <egil@erlang.org>
Date: Mon, 22 Aug 2016 17:48:57 +0200
Subject: stdlib: Produce correct warning for erlang:hash/2

---
 lib/stdlib/src/otp_internal.erl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index f4257fb571..5bf77a5160 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -47,9 +47,6 @@ obsolete(Module, Name, Arity) ->
 obsolete_1(net, _, _) ->
     {deprecated, "module 'net' obsolete; use 'net_adm'"};
 
-obsolete_1(erlang, hash, 2) ->
-    {deprecated, {erlang, phash2, 2}};
-
 obsolete_1(erlang, now, 0) ->
     {deprecated,
      "Deprecated BIF. See the \"Time and Time Correction in Erlang\" "
@@ -553,6 +550,13 @@ obsolete_1(overload, _, _) ->
 obsolete_1(rpc, safe_multi_server_call, A) when A =:= 2; A =:= 3 ->
     {removed, {rpc, multi_server_call, A}};
 
+%% Removed in OTP 20.
+
+obsolete_1(erlang, hash, 2) ->
+    {removed, {erlang, phash2, 2}, "20.0"};
+
+%% not obsolete
+
 obsolete_1(_, _, _) ->
     no.
 
-- 
cgit v1.2.3


From 26b59dfe67ef551cd94765557cdd8c79794bcc38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@plataformatec.com.br>
Date: Tue, 31 May 2016 14:28:54 +0200
Subject: Add new AtU8 beam chunk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new chunk stores atoms encoded in UTF-8.

beam_lib has also been modified to handle the new
'utf8_atoms' attribute while the 'atoms' attribute
may be a missing chunk from now on.

The binary_to_atom/2 BIF can now encode any utf8
binary with up to 255 characters.

The list_to_atom/1 BIF can now accept codepoints
higher than 255 with up to 255 characters (thanks
to Björn Gustavsson).
---
 lib/stdlib/src/beam_lib.erl | 54 ++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 18 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/beam_lib.erl b/lib/stdlib/src/beam_lib.erl
index d7ee5c1f5d..461acf03be 100644
--- a/lib/stdlib/src/beam_lib.erl
+++ b/lib/stdlib/src/beam_lib.erl
@@ -63,7 +63,7 @@
 -type label()     :: integer().
 
 -type chunkid()   :: nonempty_string(). % approximation of the strings below
-%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom".
+%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom" | "AtU8".
 -type chunkname() :: 'abstract_code' | 'attributes' | 'compile_info'
                    | 'exports' | 'labeled_exports'
                    | 'imports' | 'indexed_imports'
@@ -520,6 +520,8 @@ read_chunk_data(File0, ChunkNames0, Options)
     end.
 
 %% -> {ok, list()} | throw(Error)
+check_chunks([atoms | Ids], File, IL, L) ->
+    check_chunks(Ids, File, ["Atom", "AtU8" | IL], [{atom_chunk, atoms} | L]);
 check_chunks([ChunkName | Ids], File, IL, L) when is_atom(ChunkName) ->
     ChunkId = chunk_name_to_id(ChunkName, File),
     check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkName} | L]);
@@ -537,6 +539,10 @@ scan_beam(File, What0, AllowMissingChunks) ->
     case scan_beam1(File, What0) of
 	{missing, _FD, Mod, Data, What} when AllowMissingChunks ->
 	    {ok, Mod, [{Id, missing_chunk} || Id <- What] ++ Data};
+	{missing, _FD, Mod, Data, ["Atom"]} ->
+	    {ok, Mod, Data};
+	{missing, _FD, Mod, Data, ["AtU8"]} ->
+	    {ok, Mod, Data};
 	{missing, FD, _Mod, _Data, What} ->
 	    error({missing_chunk, filename(FD), hd(What)});
 	R ->
@@ -581,18 +587,23 @@ scan_beam(FD, Pos, What, Mod, Data) ->
 	    error({invalid_beam_file, filename(FD), Pos})
     end.
 
-get_data(Cs, "Atom"=Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, Encoding) ->
     NewCs = del_chunk(Id, Cs),
     {NFD, Chunk} = get_chunk(Id, Pos, Size, FD),
     <<_Num:32, Chunk2/binary>> = Chunk,
-    {Module, _} = extract_atom(Chunk2),
+    {Module, _} = extract_atom(Chunk2, Encoding),
     C = case Cs of
 	    info -> 
 		{Id, Pos, Size};
 	    _ -> 
 		{Id, Chunk}
 	end,
-    scan_beam(NFD, Pos2, NewCs, Module, [C | Data]);
+    scan_beam(NFD, Pos2, NewCs, Module, [C | Data]).
+
+get_data(Cs, "Atom" = Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+    get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, latin1);
+get_data(Cs, "AtU8" = Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+    get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, utf8);
 get_data(info, Id, FD, Size, Pos, Pos2, Mod, Data) ->
     scan_beam(FD, Pos2, info, Mod, [{Id, Pos, Size} | Data]);
 get_data(Chunks, Id, FD, Size, Pos, Pos2, Mod, Data) ->
@@ -624,6 +635,9 @@ get_chunk(Id, Pos, Size, FD) ->
 	    {NFD, Chunk}
     end.
 
+chunks_to_data([{atom_chunk, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
+    {NewAtoms, Ret} = chunk_to_data(Name, <<"">>, File, Cs, Atoms, Module),
+    chunks_to_data(CNs, Chunks, File, Cs, Module, NewAtoms, [Ret | L]);
 chunks_to_data([{Id, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
     {_Id, Chunk} = lists:keyfind(Id, 1, Chunks),
     {NewAtoms, Ret} = chunk_to_data(Name, Chunk, File, Cs, Atoms, Module),
@@ -651,7 +665,7 @@ chunk_to_data(abstract_code=Id, Chunk, File, _Cs, AtomTable, Mod) ->
 	<<>> ->
 	    {AtomTable, {Id, no_abstract_code}};
 	<<0:8,N:8,Mode0:N/binary,Rest/binary>> ->
-	    Mode = list_to_atom(binary_to_list(Mode0)),
+	    Mode = binary_to_atom(Mode0, utf8),
 	    decrypt_abst(Mode, Mod, File, Id, AtomTable, Rest);
 	_ ->
 	    case catch binary_to_term(Chunk) of
@@ -683,7 +697,6 @@ chunk_to_data(ChunkId, Chunk, _File,
 	      _Cs, AtomTable, _Module) when is_list(ChunkId) ->
     {AtomTable, {ChunkId, Chunk}}. % Chunk is a binary
 
-chunk_name_to_id(atoms, _)           -> "Atom";
 chunk_name_to_id(indexed_imports, _) -> "ImpT";
 chunk_name_to_id(imports, _)         -> "ImpT";
 chunk_name_to_id(exports, _)         -> "ExpT";
@@ -738,25 +751,30 @@ atm(AT, N) ->
 
 %% AT is updated.
 ensure_atoms({empty, AT}, Cs) ->
-    {_Id, AtomChunk} = lists:keyfind("Atom", 1, Cs),
-    extract_atoms(AtomChunk, AT),
+    case lists:keyfind("AtU8", 1, Cs) of
+	{_Id, AtomChunk} when is_binary(AtomChunk) ->
+	    extract_atoms(AtomChunk, AT, utf8);
+	_ ->
+	    {_Id, AtomChunk} = lists:keyfind("Atom", 1, Cs),
+	    extract_atoms(AtomChunk, AT, latin1)
+    end,
     AT;
 ensure_atoms(AT, _Cs) ->
     AT.
 
-extract_atoms(<<_Num:32, B/binary>>, AT) ->
-    extract_atoms(B, 1, AT).
+extract_atoms(<<_Num:32, B/binary>>, AT, Encoding) ->
+    extract_atoms(B, 1, AT, Encoding).
 
-extract_atoms(<<>>, _I, _AT) ->
+extract_atoms(<<>>, _I, _AT, _Encoding) ->
     true;
-extract_atoms(B, I, AT) ->
-    {Atom, B1} = extract_atom(B),
+extract_atoms(B, I, AT, Encoding) ->
+    {Atom, B1} = extract_atom(B, Encoding),
     true = ets:insert(AT, {I, Atom}),
-    extract_atoms(B1, I+1, AT).
+    extract_atoms(B1, I+1, AT, Encoding).
 
-extract_atom(<<Len, B/binary>>) ->
+extract_atom(<<Len, B/binary>>, Encoding) ->
     <<SB:Len/binary, Tail/binary>> = B,
-    {list_to_atom(binary_to_list(SB)), Tail}.
+    {binary_to_atom(SB, Encoding), Tail}.
 
 %%% Utils.
 
@@ -856,12 +874,12 @@ significant_chunks() ->
 %% for a module. They are listed in the order that they should be MD5:ed.
 
 md5_chunks() ->
-    ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"].
+    ["Atom", "AtU8", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"].
 
 %% The following chunks are mandatory in every Beam file.
 
 mandatory_chunks() ->
-    ["Code", "ExpT", "ImpT", "StrT", "Atom"].
+    ["Code", "ExpT", "ImpT", "StrT"].
 
 %%% ====================================================================
 %%% The rest of the file handles encrypted debug info.
-- 
cgit v1.2.3


From db442323e9e86528edeb7226d55404e290b088b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 1 Feb 2017 16:25:47 +0100
Subject: Make "~s" fail for Unicode atoms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

26b59dfe67e introduced support for arbitrary Unicode characters in
atoms. After that commit, it is possible to print any atom with
a "~s" format string:

  1> io:format("~s\n", ['спутник']).
  спутник

Note that the same text as a string will fail:

2> io:format("~s\n", ["спутник"]).
** exception error: bad argument
     in function  io:format/3
        called as io:format(<0.53.0>,"~s\n",
             [[1089,1087,1091,1090,1085,1080,1082]])

Being more permissive for atoms is probably beneficial for io:format/2.

However, for io_lib:format/2, the new behavior breaks this guarantee
in the documentation for io_lib:format/2:

   If and only if the Unicode translation modifier is used in
   the format string (that is, ~ts or ~tc), the resulting list
   can contain characters beyond the ISO Latin-1 character range
   (that is, numbers > 255).

The problem is that you can no longer be sure whether io_lib:format/2
will return an iolist that can be successfully passed to a port
or iolist_to_binary/1.

We see three solutions:

1. Keep the new behavior. That means that you can get non-iolist data
when you use ~s for printing an atom, but a 'badarg' when printing
Unicode strings. That is inconsistent, and it delays error detection
if the result is passed to a port or iolist_to_binary/1.

2. Always allow Unicode characters for ~s. That would be incompatible,
because ~s says that any binary is encoded in latin1, while ~ts says
that any binary is encoded in UTF-8. To implement this solution, we
could no longer support latin1 binaries; all binaries would have to
be encoded in UTF-8.

3. Only allow ~s for atoms where all characters are less than 256.
Require ~ts to print atoms such as 'спутник'.

We reject solution 1 because it is slightly incompatible and is
inconsistent.

We reject solution 2 because it too incompatible.

Therefore, this commit implements solution 3.
---
 lib/stdlib/src/io_lib_format.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index c7b75961cb..3113767614 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -265,7 +265,10 @@ control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) ->
     term(io_lib:write(A, Depth), F, Adj, P, Pad);
 control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) ->
     print(A, Depth, F, Adj, P, Pad, Enc, Str, I);
-control($s, [A], F, Adj, P, Pad, _Enc, _Str, _I) when is_atom(A) ->
+control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) ->
+    L = iolist_to_chars(atom_to_list(A)),
+    string(L, F, Adj, P, Pad);
+control($s, [A], F, Adj, P, Pad, unicode, _Str, _I) when is_atom(A) ->
     string(atom_to_list(A), F, Adj, P, Pad);
 control($s, [L0], F, Adj, P, Pad, latin1, _Str, _I) ->
     L = iolist_to_chars(L0),
-- 
cgit v1.2.3


From 6fff0463013f87963be707b80664bc209a1c4c16 Mon Sep 17 00:00:00 2001
From: Richard Carlsson <richardc@klarna.com>
Date: Wed, 18 Jan 2017 10:39:19 +0100
Subject: Refactor filename:find_src/1

---
 lib/stdlib/src/filename.erl | 88 ++++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 41 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index c4586171ca..51d5ca711d 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -793,14 +793,7 @@ separators() ->
               | {'d', atom()},
       ErrorReason :: 'non_existing' | 'preloaded' | 'interpreted'.
 find_src(Mod) ->
-    Default = [{"", ""}, {"ebin", "src"}, {"ebin", "esrc"}],
-    Rules = 
-	case application:get_env(kernel, source_search_rules) of
-	    undefined -> Default;
-	    {ok, []} -> Default;
-	    {ok, R} when is_list(R) -> R
-	end,
-    find_src(Mod, Rules).
+    find_src(Mod, []).
 
 -spec find_src(Beam, Rules) -> {SourceFile, Options}
                              | {error, {ErrorReason, Module}} when
@@ -816,44 +809,46 @@ find_src(Mod) ->
       ErrorReason :: 'non_existing' | 'preloaded' | 'interpreted'.
 find_src(Mod, Rules) when is_atom(Mod) ->
     find_src(atom_to_list(Mod), Rules);
-find_src(File0, Rules) when is_list(File0) ->
-    Mod = list_to_atom(basename(File0, ".erl")),
-    File = rootname(File0, ".erl"),
-    case readable_file(File++".erl") of
-	true  ->
-	    try_file(File, Mod, Rules);
-	false ->
-	    try_file(undefined, Mod, Rules)
-    end.
-
-try_file(File, Mod, Rules) ->
+find_src(ModOrFile, Rules) when is_list(ModOrFile) ->
+    Extension = ".erl",
+    Mod = list_to_atom(basename(ModOrFile, Extension)),
     case code:which(Mod) of
 	Possibly_Rel_Path when is_list(Possibly_Rel_Path) ->
-	    {ok, Cwd} = file:get_cwd(),
-	    Path = join(Cwd, Possibly_Rel_Path),
-	    try_file(File, Path, Mod, Rules);
+            {ok, Cwd} = file:get_cwd(),
+            Dir = dirname(make_abs_path(Cwd, Possibly_Rel_Path)),
+            find_src_1(ModOrFile, Dir, Mod, Extension, Rules);
 	Ecode when is_atom(Ecode) -> % Ecode :: ecode()
 	    {error, {Ecode, Mod}}
     end.
 
 %% At this point, the Mod is known to be valid.
 %% If the source name is not known, find it.
-%% Then get the compilation options.
-%% Returns: {SrcFile, Options}
+find_src_1(ModOrFile, Dir, Mod, Extension, Rules) ->
+    %% The documentation says this function must return the found path
+    %% without extension in all cases. Also, ModOrFile could be given with
+    %% or without extension. Hence the calls to rootname below.
+    ModOrFileRoot = rootname(ModOrFile, Extension),
+    case readable_file(ModOrFileRoot++Extension) of
+        true  ->
+            find_src_2(ModOrFileRoot, Mod);
+        false ->
+            case get_source_file(Dir, atom_to_list(Mod)++Extension, Rules) of
+                {ok, Src} ->
+                    find_src_2(rootname(Src, Extension), Mod);
+                Error ->
+                    Error
+            end
+    end.
 
-try_file(undefined, ObjFilename, Mod, Rules) ->
-    case get_source_file(ObjFilename, Mod, Rules) of
-	{ok, File} -> try_file(File, ObjFilename, Mod, Rules);
-	Error -> Error
-    end;
-try_file(Src, _ObjFilename, Mod, _Rules) ->
+%% Get the compilation options and return {SrcFileRoot, Options}
+find_src_2(SrcRoot, Mod) ->
     List = case Mod:module_info(compile) of
 	       none -> [];
 	       List0 -> List0
 	   end,
     Options = proplists:get_value(options, List, []),
     {ok, Cwd} = file:get_cwd(),
-    AbsPath = make_abs_path(Cwd, Src),
+    AbsPath = make_abs_path(Cwd, SrcRoot),
     {AbsPath, filter_options(dirname(AbsPath), Options, [])}.
 
 %% Filters the options.
@@ -884,25 +879,36 @@ filter_options(Base, [_|Rest], Result) ->
 filter_options(_Base, [], Result) ->
     Result.
 
-%% Gets the source file given path of object code and module name.
+%% Gets the source file given the object directory.
+
+get_source_file(Dir, Filename, []) ->
+    Rules =
+	case application:get_env(kernel, source_search_rules) of
+	    undefined -> default_source_search_rules();
+	    {ok, []} -> default_source_search_rules();
+	    {ok, R} when is_list(R) -> R
+	end,
+    get_source_file(Dir, Filename, Rules);
+get_source_file(Dir, Filename, Rules) ->
+    source_by_rules(Dir, Filename, Rules).
 
-get_source_file(Obj, Mod, Rules) ->
-    source_by_rules(dirname(Obj), atom_to_list(Mod), Rules).
+default_source_search_rules() ->
+    [{"", ""}, {"ebin", "src"}, {"ebin", "esrc"}].
 
-source_by_rules(Dir, Base, [{From, To}|Rest]) ->
-    case try_rule(Dir, Base, From, To) of
+source_by_rules(Dir, Filename, [{From, To}|Rest]) ->
+    case try_rule(Dir, Filename, From, To) of
 	{ok, File} -> {ok, File};
-	error      -> source_by_rules(Dir, Base, Rest)
+	error      -> source_by_rules(Dir, Filename, Rest)
     end;
-source_by_rules(_Dir, _Base, []) ->
+source_by_rules(_Dir, _Filename, []) ->
     {error, source_file_not_found}.
 
-try_rule(Dir, Base, From, To) ->
+try_rule(Dir, Filename, From, To) ->
     case lists:suffix(From, Dir) of
 	true -> 
 	    NewDir = lists:sublist(Dir, 1, length(Dir)-length(From))++To,
-	    Src = join(NewDir, Base),
-	    case readable_file(Src++".erl") of
+	    Src = join(NewDir, Filename),
+	    case readable_file(Src) of
 		true -> {ok, Src};
 		false -> error
 	    end;
-- 
cgit v1.2.3


From 57aaf7d0c7c75cfd8c6b55c21d977b695f460022 Mon Sep 17 00:00:00 2001
From: Richard Carlsson <richardc@klarna.com>
Date: Wed, 18 Jan 2017 18:28:47 +0100
Subject: Add filelib:find_file/2/3 and filelib:find_source/1/2/3

This moves, extends and exports functionality that previously existed only
internally in filename:find_src/1/2, adding the ability to automatically
substitute file suffixes and use different rules for different suffixes.
---
 lib/stdlib/src/filelib.erl  | 122 ++++++++++++++++++++++++++++++++++++++++++++
 lib/stdlib/src/filename.erl |  74 ++++++---------------------
 2 files changed, 138 insertions(+), 58 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index 7029389e2f..daa18da9aa 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -24,6 +24,7 @@
 -export([fold_files/5, last_modified/1, file_size/1, ensure_dir/1]).
 -export([wildcard/3, is_dir/2, is_file/2, is_regular/2]).
 -export([fold_files/6, last_modified/2, file_size/2]).
+-export([find_file/2, find_file/3, find_source/1, find_source/2, find_source/3]).
 
 %% For debugging/testing.
 -export([compile_wildcard/1]).
@@ -517,3 +518,124 @@ eval_list_dir(Dir, erl_prim_loader) ->
     end;
 eval_list_dir(Dir, Mod) ->
     Mod:list_dir(Dir).
+
+%% Getting the rules to use for file search
+
+keep_dir_search_rules(Rules) ->
+    [T || {_,_}=T <- Rules].
+
+keep_suffix_search_rules(Rules) ->
+    [T || {_,_,_}=T <- Rules].
+
+get_search_rules() ->
+    case application:get_env(kernel, source_search_rules) of
+        undefined -> default_search_rules();
+        {ok, []}  -> default_search_rules();
+        {ok, R} when is_list(R) -> R
+    end.
+
+default_search_rules() ->
+    [%% suffix-speficic rules for source search
+     {".beam", ".erl", erl_source_search_rules()},
+     {".erl", ".yrl", []},
+     {"", ".src", erl_source_search_rules()},
+     {".so", ".c", c_source_search_rules()},
+     {".o", ".c", c_source_search_rules()},
+     {"", ".c", c_source_search_rules()},
+     {"", ".in", basic_source_search_rules()},
+     %% plain old directory rules, backwards compatible
+     {"", ""},
+     {"ebin","src"},
+     {"ebin","esrc"}
+    ].
+
+basic_source_search_rules() ->
+    (erl_source_search_rules()
+     ++ c_source_search_rules()).
+
+erl_source_search_rules() ->
+    [{"ebin","src"}, {"ebin","esrc"}].
+
+c_source_search_rules() ->
+    [{"priv","c_src"}, {"priv","src"}, {"bin","c_src"}, {"bin","src"}, {"", "src"}].
+
+%% Looks for a file relative to a given directory
+
+-type find_file_rule() :: {ObjDirSuffix::string(), SrcDirSuffix::string()}.
+
+-spec find_file(filename(), filename()) ->
+        {ok, filename()} | {error, not_found}.
+find_file(Filename, Dir) ->
+    find_file(Filename, Dir, []).
+
+-spec find_file(filename(), filename(), [find_file_rule()]) ->
+        {ok, filename()} | {error, not_found}.
+find_file(Filename, Dir, []) ->
+    find_file(Filename, Dir, get_search_rules());
+find_file(Filename, Dir, Rules) ->
+    try_dir_rules(keep_dir_search_rules(Rules), Filename, Dir).
+
+%% Looks for a source file relative to the object file name and directory
+
+-type find_source_rule() :: {ObjExtension::string(), SrcExtension::string(),
+                             [find_file_rule()]}.
+
+-spec find_source(filename()) ->
+        {ok, filename()} | {error, not_found}.
+find_source(FilePath) ->
+    find_source(filename:basename(FilePath), filename:dirname(FilePath)).
+
+-spec find_source(filename(), filename()) ->
+        {ok, filename()} | {error, not_found}.
+find_source(Filename, Dir) ->
+    find_source(Filename, Dir, []).
+
+-spec find_source(filename(), filename(), [find_source_rule()]) ->
+        {ok, filename()} | {error, not_found}.
+find_source(Filename, Dir, []) ->
+    find_source(Filename, Dir, get_search_rules());
+find_source(Filename, Dir, Rules) ->
+    try_suffix_rules(keep_suffix_search_rules(Rules), Filename, Dir).
+
+try_suffix_rules(Rules, Filename, Dir) ->
+    Ext = filename:extension(Filename),
+    try_suffix_rules(Rules, filename:rootname(Filename, Ext), Dir, Ext).
+
+try_suffix_rules([{Ext,Src,Rules}|Rest], Root, Dir, Ext)
+  when is_list(Src), is_list(Rules) ->
+    case try_dir_rules(add_local_search(Rules), Root ++ Src, Dir) of
+        {ok, File} -> {ok, File};
+        _Other ->
+            try_suffix_rules(Rest, Root, Dir, Ext)
+    end;
+try_suffix_rules([_|Rest], Root, Dir, Ext) ->
+    try_suffix_rules(Rest, Root, Dir, Ext);
+try_suffix_rules([], _Root, _Dir, _Ext) ->
+    {error, not_found}.
+
+%% ensuring we check the directory of the object file before any other directory
+add_local_search(Rules) ->
+    Local = {"",""},
+    [Local] ++ lists:filter(fun (X) -> X =/= Local end, Rules).
+
+try_dir_rules([{From, To}|Rest], Filename, Dir)
+  when is_list(From), is_list(To) ->
+    case try_dir_rule(Dir, Filename, From, To) of
+	{ok, File} -> {ok, File};
+	error      -> try_dir_rules(Rest, Filename, Dir)
+    end;
+try_dir_rules([], _Filename, _Dir) ->
+    {error, not_found}.
+
+try_dir_rule(Dir, Filename, From, To) ->
+    case lists:suffix(From, Dir) of
+	true ->
+	    NewDir = lists:sublist(Dir, 1, length(Dir)-length(From))++To,
+	    Src = filename:join(NewDir, Filename),
+	    case is_regular(Src) of
+		true -> {ok, Src};
+		false -> error
+	    end;
+	false ->
+	    error
+    end.
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 51d5ca711d..0ff22f876a 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -34,8 +34,8 @@
 -export([absname/1, absname/2, absname_join/2, 
 	 basename/1, basename/2, dirname/1,
 	 extension/1, join/1, join/2, pathtype/1,
-	 rootname/1, rootname/2, split/1, nativename/1]).
--export([find_src/1, find_src/2, flatten/1]).
+	 rootname/1, rootname/2, split/1, flatten/1, nativename/1]).
+-export([find_src/1, find_src/2]). % deprecated
 -export([basedir/2, basedir/3]).
 
 %% Undocumented and unsupported exports.
@@ -750,8 +750,12 @@ separators() ->
 	_ -> {false, false}
     end.
 
-
-
+%% NOTE: The find_src/1/2 functions are deprecated; they try to do too much
+%% at once and are not a good fit for this module. Parts of the code have
+%% been moved to filelib:find_file/2 instead. Only this part of this
+%% module is allowed to call the filelib module; such mutual dependency
+%% should otherwise be avoided! This code should eventually be removed.
+%%
 %% find_src(Module) --
 %% find_src(Module, Rules) --
 %%
@@ -815,26 +819,27 @@ find_src(ModOrFile, Rules) when is_list(ModOrFile) ->
     case code:which(Mod) of
 	Possibly_Rel_Path when is_list(Possibly_Rel_Path) ->
             {ok, Cwd} = file:get_cwd(),
-            Dir = dirname(make_abs_path(Cwd, Possibly_Rel_Path)),
-            find_src_1(ModOrFile, Dir, Mod, Extension, Rules);
+            ObjPath = make_abs_path(Cwd, Possibly_Rel_Path),
+            find_src_1(ModOrFile, ObjPath, Mod, Extension, Rules);
 	Ecode when is_atom(Ecode) -> % Ecode :: ecode()
 	    {error, {Ecode, Mod}}
     end.
 
 %% At this point, the Mod is known to be valid.
 %% If the source name is not known, find it.
-find_src_1(ModOrFile, Dir, Mod, Extension, Rules) ->
+find_src_1(ModOrFile, ObjPath, Mod, Extension, Rules) ->
     %% The documentation says this function must return the found path
     %% without extension in all cases. Also, ModOrFile could be given with
     %% or without extension. Hence the calls to rootname below.
     ModOrFileRoot = rootname(ModOrFile, Extension),
-    case readable_file(ModOrFileRoot++Extension) of
+    case filelib:is_regular(ModOrFileRoot++Extension) of
         true  ->
             find_src_2(ModOrFileRoot, Mod);
         false ->
-            case get_source_file(Dir, atom_to_list(Mod)++Extension, Rules) of
-                {ok, Src} ->
-                    find_src_2(rootname(Src, Extension), Mod);
+            SrcName = basename(ObjPath, code:objfile_extension()) ++ Extension,
+            case filelib:find_file(SrcName, dirname(ObjPath), Rules) of
+                {ok, SrcFile} ->
+                    find_src_2(rootname(SrcFile, Extension), Mod);
                 Error ->
                     Error
             end
@@ -879,53 +884,6 @@ filter_options(Base, [_|Rest], Result) ->
 filter_options(_Base, [], Result) ->
     Result.
 
-%% Gets the source file given the object directory.
-
-get_source_file(Dir, Filename, []) ->
-    Rules =
-	case application:get_env(kernel, source_search_rules) of
-	    undefined -> default_source_search_rules();
-	    {ok, []} -> default_source_search_rules();
-	    {ok, R} when is_list(R) -> R
-	end,
-    get_source_file(Dir, Filename, Rules);
-get_source_file(Dir, Filename, Rules) ->
-    source_by_rules(Dir, Filename, Rules).
-
-default_source_search_rules() ->
-    [{"", ""}, {"ebin", "src"}, {"ebin", "esrc"}].
-
-source_by_rules(Dir, Filename, [{From, To}|Rest]) ->
-    case try_rule(Dir, Filename, From, To) of
-	{ok, File} -> {ok, File};
-	error      -> source_by_rules(Dir, Filename, Rest)
-    end;
-source_by_rules(_Dir, _Filename, []) ->
-    {error, source_file_not_found}.
-
-try_rule(Dir, Filename, From, To) ->
-    case lists:suffix(From, Dir) of
-	true -> 
-	    NewDir = lists:sublist(Dir, 1, length(Dir)-length(From))++To,
-	    Src = join(NewDir, Filename),
-	    case readable_file(Src) of
-		true -> {ok, Src};
-		false -> error
-	    end;
-	false ->
-	    error
-    end.
-
-readable_file(File) ->
-    case file:read_file_info(File) of
-	{ok, #file_info{type=regular, access=read}} ->
-	    true;
-	{ok, #file_info{type=regular, access=read_write}} ->
-	    true;
-	_Other ->
-	    false
-    end.
-
 make_abs_path(BasePath, Path) ->
     join(BasePath, Path).
 
-- 
cgit v1.2.3


From 0eb45e21d406539caaad98bfc1740f9a11e32565 Mon Sep 17 00:00:00 2001
From: Richard Carlsson <richardc@klarna.com>
Date: Tue, 6 Dec 2016 12:14:18 +0100
Subject: Add shell shortcut for recompiling existing modules

This extends the shell function c/1 and c/2 so that if the argument is a
module name instead of a file name, it automatically locates the .beam file
and the corresponding source file, and then recompiles the module using the
same compiler options (plus any options passed to c/2). If compilation
fails, the old beam file is preserved. Also adds c(Mod, Opts, Filter),
where the Filter argument allows you to remove old compiler options before
the new options are added.
---
 lib/stdlib/src/c.erl             | 254 ++++++++++++++++++++++++++++++++++-----
 lib/stdlib/src/shell_default.erl |   3 +-
 2 files changed, 226 insertions(+), 31 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index d36630214c..d3f9a9c7af 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -23,7 +23,7 @@
 
 %% Avoid warning for local function error/2 clashing with autoimported BIF.
 -compile({no_auto_import,[error/2]}).
--export([help/0,lc/1,c/1,c/2,nc/1,nc/2, nl/1,l/1,i/0,i/1,ni/0,
+-export([help/0,lc/1,c/1,c/2,c/3,nc/1,nc/2, nl/1,l/1,i/0,i/1,ni/0,
          y/1, y/2,
 	 lc_batch/0, lc_batch/1,
 	 i/3,pid/3,m/0,m/1,mm/0,lm/0,
@@ -44,7 +44,7 @@
 
 help() ->
     io:put_chars(<<"bt(Pid)    -- stack backtrace for a process\n"
-		   "c(File)    -- compile and load code in <File>\n"
+		   "c(Mod)     -- compile and load module or file <Mod>\n"
 		   "cd(Dir)    -- change working directory\n"
 		   "flush()    -- flush any messages sent to the shell\n"
 		   "help()     -- help info\n"
@@ -72,32 +72,222 @@ help() ->
 		   "xm(M)      -- cross reference check a module\n"
 		   "y(File)    -- generate a Yecc parser\n">>).
 
-%% c(FileName)
-%%  Compile a file/module.
+%% c(Module)
+%%  Compile a module/file.
 
--spec c(File) -> {'ok', Module} | 'error' when
-      File :: file:name(),
-      Module :: module().
+-spec c(Module) -> {'ok', ModuleName} | 'error' when
+      Module :: file:name(),
+      ModuleName :: module().
 
-c(File) -> c(File, []).
+c(Module) -> c(Module, []).
 
--spec c(File, Options) -> {'ok', Module} | 'error' when
-      File :: file:name(),
+-spec c(Module, Options) -> {'ok', ModuleName} | 'error' when
+      Module :: file:name(),
       Options :: [compile:option()],
-      Module :: module().
+      ModuleName :: module().
+
+c(Module, Opts) when is_atom(Module) ->
+    %% either a module name or a source file name (possibly without
+    %% suffix); if such a source file exists, it is used to compile from
+    %% scratch with the given options, otherwise look for an object file
+    Suffix = case filename:extension(Module) of
+                 "" -> src_suffix(Opts);
+                 S -> S
+             end,
+    SrcFile = filename:rootname(Module, Suffix) ++ Suffix,
+    case filelib:is_file(SrcFile) of
+        true ->
+            compile_and_load(SrcFile, Opts);
+        false ->
+            c(Module, Opts, fun (_) -> true end)
+    end;
+c(Module, Opts) ->
+    %% we never interpret a string as a module name, only as a file
+    compile_and_load(Module, Opts).
+
+%% This tries to find an existing object file and use its compile_info and
+%% source path to recompile the module, overwriting the old object file.
+%% The Filter parameter is applied to the old compile options
+
+-spec c(Module, Options, Filter) -> {'ok', ModuleName} | 'error' when
+      Module :: atom(),
+      Options :: [compile:option()],
+      Filter :: fun ((compile:option()) -> boolean()),
+      ModuleName :: module().
+
+c(Module, Options, Filter) when is_atom(Module) ->
+    case find_beam(Module) of
+        BeamFile when is_list(BeamFile) ->
+            c(Module, Options, Filter, BeamFile);
+        Error ->
+            {error, Error}
+    end.
+
+c(Module, Options, Filter, BeamFile) ->
+    case compile_info(Module, BeamFile) of
+        Info when is_list(Info) ->
+            case find_source(BeamFile, Info) of
+                SrcFile when is_list(SrcFile) ->
+                    c(SrcFile, Options, Filter, BeamFile, Info);
+                Error ->
+                    Error
+            end;
+        Error ->
+            Error
+    end.
+
+c(SrcFile, NewOpts, Filter, BeamFile, Info) ->
+    %% Filter old options; also remove options that will be replaced.
+    %% Write new beam over old beam unless other outdir is specified.
+    F = fun (Opt) -> not is_outdir_opt(Opt) andalso Filter(Opt) end,
+    Options = (NewOpts ++ [{outdir,filename:dirname(BeamFile)}]
+               ++ lists:filter(F, old_options(Info))),
+    format("Recompiling ~s\n", [SrcFile]),
+    safe_recompile(SrcFile, Options, BeamFile).
+
+old_options(Info) ->
+    case lists:keyfind(options, 1, Info) of
+        {options, Opts} -> Opts;
+        false -> []
+    end.
+
+%% prefer the source path in the compile info if the file exists,
+%% otherwise do a standard source search relative to the beam file
+find_source(BeamFile, Info) ->
+    case lists:keyfind(source, 1, Info) of
+        {source, SrcFile} ->
+            case filelib:is_file(SrcFile) of
+                true -> SrcFile;
+                false -> find_source(BeamFile)
+            end;
+        _ ->
+            find_source(BeamFile)
+    end.
+
+find_source(BeamFile) ->
+    case filelib:find_source(BeamFile) of
+        {ok, SrcFile} -> SrcFile;
+        _ -> {error, no_source}
+    end.
 
-c(File, Opts0) when is_list(Opts0) ->
-    Opts = [report_errors,report_warnings|Opts0],
+%% find the beam file for a module, preferring the path reported by code:which()
+%% if it still exists, or otherwise by searching the code path
+find_beam(Module) when is_atom(Module) ->
+    case code:which(Module) of
+        Beam when is_list(Beam), Beam =/= "" ->
+            case erlang:module_loaded(Module) of
+                false ->
+                    Beam;  % code:which/1 found this in the path
+                true ->
+                    case filelib:is_file(Beam) of
+                        true -> Beam;
+                        false -> find_beam_1(Module)  % file moved?
+                    end
+            end;
+        Other when Other =:= ""; Other =:= cover_compiled ->
+            %% module is loaded but not compiled directly from source
+            find_beam_1(Module);
+        Error ->
+            Error
+    end.
+
+find_beam_1(Module) ->
+    File = atom_to_list(Module) ++ code:objfile_extension(),
+    case code:where_is_file(File) of
+        Beam when is_list(Beam) ->
+            Beam;
+        Error ->
+            Error
+    end.
+
+%% get the compile_info for a module
+%% -will report the info for the module in memory, if loaded
+%% -will try to find and examine the beam file if not in memory
+%% -will not cause a module to become loaded by accident
+compile_info(Module, Beam) when is_atom(Module) ->
+    case erlang:module_loaded(Module) of
+        true ->
+            %% getting the compile info for a loaded module should normally
+            %% work, but return an empty info list if it fails
+            try erlang:get_module_info(Module, compile)
+            catch _:_ -> []
+            end;
+        false ->
+            case beam_lib:chunks(Beam, [compile_info]) of
+                {ok, {_Module, [{compile_info, Info}]}} ->
+                    Info;
+                Error ->
+                    Error
+            end
+    end.
+
+%% compile module, backing up any existing target file and restoring the
+%% old version if compilation fails (this should only be used when we have
+%% an old beam file that we want to preserve)
+safe_recompile(File, Options, BeamFile) ->
+    %% Note that it's possible that because of options such as 'to_asm',
+    %% the compiler might not actually write a new beam file at all
+    Backup = BeamFile ++ ".bak",
+    case file:rename(BeamFile, Backup) of
+        Status when Status =:= ok; Status =:= {error,enoent} ->
+            case compile_and_load(File, Options) of
+                {ok, _} = Result ->
+                    _ = if Status =:= ok -> file:delete(Backup);
+                           true -> ok
+                        end,
+                    Result;
+                Error ->
+                    _ = if Status =:= ok -> file:rename(Backup, BeamFile);
+                           true -> ok
+                        end,
+                    Error
+            end;
+        Error ->
+            Error
+    end.
+
+%% Compile the file and load the resulting object code (if any).
+%% Automatically ensures that there is an outdir option, by default the
+%% directory of File, and that a 'from' option will be passed to match the
+%% actual source suffix if needed (unless already specified).
+compile_and_load(File, Opts0) when is_list(Opts0) ->
+    Opts = [report_errors, report_warnings
+            | ensure_from(filename:extension(File),
+                          ensure_outdir(filename:dirname(File), Opts0))],
     case compile:file(File, Opts) of
 	{ok,Mod} ->				%Listing file.
-	    machine_load(Mod, File, Opts);
+	    purge_and_load(Mod, File, Opts);
 	{ok,Mod,_Ws} ->				%Warnings maybe turned on.
-	    machine_load(Mod, File, Opts);
+	    purge_and_load(Mod, File, Opts);
 	Other ->				%Errors go here
 	    Other
     end;
-c(File, Opt) -> 
-    c(File, [Opt]).
+compile_and_load(File, Opt) ->
+    compile_and_load(File, [Opt]).
+
+ensure_from(Suffix, Opts0) ->
+    case lists:partition(fun is_from_opt/1, Opts0++from_opt(Suffix)) of
+        {[Opt|_], Opts} -> [Opt | Opts];
+        {[], Opts} -> Opts
+    end.
+
+ensure_outdir(Dir, Opts0) ->
+    {[Opt|_], Opts} = lists:partition(fun is_outdir_opt/1,
+                                      Opts0++[{outdir,Dir}]),
+    [Opt | Opts].
+
+is_outdir_opt({outdir, _}) -> true;
+is_outdir_opt(_) -> false.
+
+is_from_opt(from_core) -> true;
+is_from_opt(from_asm) -> true;
+is_from_opt(from_beam) -> true;
+is_from_opt(_) -> false.
+
+from_opt(".core") -> [from_core];
+from_opt(".S")    -> [from_asm];
+from_opt(".beam") -> [from_beam];
+from_opt(_)       -> [].
 
 %%% Obtain the 'outdir' option from the argument. Return "." if no
 %%% such option was given.
@@ -113,18 +303,29 @@ outdir([Opt|Rest]) ->
 	    outdir(Rest)
     end.
 
+%% mimic how suffix is selected in compile:file().
+src_suffix([from_core|_]) -> ".core";
+src_suffix([from_asm|_])  -> ".S";
+src_suffix([from_beam|_]) -> ".beam";
+src_suffix([_|Opts]) -> src_suffix(Opts);
+src_suffix([]) -> ".erl".
+
 %%% We have compiled File with options Opts. Find out where the
-%%% output file went to, and load it.
-machine_load(Mod, File, Opts) ->
+%%% output file went and load it, purging any old version.
+purge_and_load(Mod, File, Opts) ->
     Dir = outdir(Opts),
-    File2 = filename:join(Dir, filename:basename(File, ".erl")),
+    Base = filename:basename(File, src_suffix(Opts)),
+    OutFile = filename:join(Dir, Base),
     case compile:output_generated(Opts) of
 	true ->
-	    Base = atom_to_list(Mod),
-	    case filename:basename(File, ".erl") of
+	    case atom_to_list(Mod) of
 		Base ->
 		    code:purge(Mod),
-		    check_load(code:load_abs(File2,Mod), Mod);
+                    %% Note that load_abs() adds the object file suffix
+		    case code:load_abs(OutFile, Mod) of
+                        {error, _R}=Error -> Error;
+                        _ -> {ok, Mod}
+                    end;
 		_OtherMod ->
 		    format("** Module name '~p' does not match file name '~tp' **~n",
 			   [Mod,File]),
@@ -135,13 +336,6 @@ machine_load(Mod, File, Opts) ->
 	    ok
     end.
 
-%%% This function previously warned if the loaded module was
-%%% loaded from some other place than current directory.
-%%% Now, loading from other than current directory is supposed to work.
-%%% so this function does nothing special.
-check_load({error, _R} = Error, _) -> Error;
-check_load(_, Mod) -> {ok, Mod}.
-
 %% Compile a list of modules
 %% enables the nice unix shell cmd
 %% erl -s c lc f1 f2 f3 @d c1=v1 @c2 @i IDir @o ODir -s erlang halt
diff --git a/lib/stdlib/src/shell_default.erl b/lib/stdlib/src/shell_default.erl
index cd63ab28b5..a0c1d98513 100644
--- a/lib/stdlib/src/shell_default.erl
+++ b/lib/stdlib/src/shell_default.erl
@@ -23,7 +23,7 @@
 
 -module(shell_default).
 
--export([help/0,lc/1,c/1,c/2,nc/1,nl/1,l/1,i/0,pid/3,i/3,m/0,m/1,lm/0,mm/0,
+-export([help/0,lc/1,c/1,c/2,c/3,nc/1,nl/1,l/1,i/0,pid/3,i/3,m/0,m/1,lm/0,mm/0,
          memory/0,memory/1,uptime/0,
 	 erlangrc/1,bi/1, regs/0, flush/0,pwd/0,ls/0,ls/1,cd/1, 
          y/1, y/2,
@@ -72,6 +72,7 @@ bi(I) 		-> c:bi(I).
 bt(Pid)		-> c:bt(Pid).
 c(File) 	-> c:c(File).
 c(File, Opt)    -> c:c(File, Opt).
+c(File, Opt, Filter) -> c:c(File, Opt, Filter).
 cd(D)           -> c:cd(D).
 erlangrc(X) 	-> c:erlangrc(X).
 flush()         -> c:flush().
-- 
cgit v1.2.3


From 2a78349342b9f72651c016b650321bb317098a3c Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Mon, 23 Jan 2017 21:26:22 +0100
Subject: Use magic refs for compiled match specs

---
 lib/stdlib/src/dets.erl |  7 ++-----
 lib/stdlib/src/ets.erl  | 18 +++++++++---------
 2 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/dets.erl b/lib/stdlib/src/dets.erl
index 5bc9475fc8..e81383775b 100644
--- a/lib/stdlib/src/dets.erl
+++ b/lib/stdlib/src/dets.erl
@@ -1063,11 +1063,8 @@ foldl_bins([Bin | Bins], MP, Terms) ->
 compile_match_spec(select, ?PATTERN_TO_OBJECT_MATCH_SPEC('_') = Spec) ->
     {Spec, true};
 compile_match_spec(select, Spec) ->
-    case catch ets:match_spec_compile(Spec) of
-	X when is_binary(X) ->
-	    {Spec, {match_spec, X}};
-	_ ->
-	    badarg
+    try {Spec, {match_spec, ets:match_spec_compile(Spec)}}
+    catch error:_ -> badarg
     end;
 compile_match_spec(object, Pat) ->
     compile_match_spec(select, ?PATTERN_TO_OBJECT_MATCH_SPEC(Pat));
diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl
index 20de06fd0b..d6fd1e3ea1 100644
--- a/lib/stdlib/src/ets.erl
+++ b/lib/stdlib/src/ets.erl
@@ -51,8 +51,8 @@
 -type tab()        :: atom() | tid().
 -type type()       :: set | ordered_set | bag | duplicate_bag.
 -type continuation() :: '$end_of_table'
-                      | {tab(),integer(),integer(),binary(),list(),integer()}
-                      | {tab(),_,_,integer(),binary(),list(),integer(),integer()}.
+                      | {tab(),integer(),integer(),comp_match_spec(),list(),integer()}
+                      | {tab(),_,_,integer(),comp_match_spec(),list(),integer(),integer()}.
 
 -opaque tid()      :: integer().
 
@@ -488,7 +488,7 @@ update_element(_, _, _) ->
 
 %%% End of BIFs
 
--opaque comp_match_spec() :: binary().  %% this one is REALLY opaque
+-opaque comp_match_spec() :: reference().
 
 -spec match_spec_run(List, CompiledMatchSpec) -> list() when
       List :: [tuple()],
@@ -505,28 +505,28 @@ match_spec_run(List, CompiledMS) ->
 repair_continuation('$end_of_table', _) ->
     '$end_of_table';
 %% ordered_set
-repair_continuation(Untouched = {Table,Lastkey,EndCondition,N2,Bin,L2,N3,N4}, MS)
+repair_continuation(Untouched = {Table,Lastkey,EndCondition,N2,MSRef,L2,N3,N4}, MS)
   when %% (is_atom(Table) or is_integer(Table)),
        is_integer(N2),
-       byte_size(Bin) =:= 0,
+      %% is_reference(MSRef),
        is_list(L2),
        is_integer(N3),
        is_integer(N4) ->
-    case ets:is_compiled_ms(Bin) of
+    case ets:is_compiled_ms(MSRef) of
 	true ->
 	    Untouched;
 	false ->
 	    {Table,Lastkey,EndCondition,N2,ets:match_spec_compile(MS),L2,N3,N4}
     end;
 %% set/bag/duplicate_bag
-repair_continuation(Untouched = {Table,N1,N2,Bin,L,N3}, MS)
+repair_continuation(Untouched = {Table,N1,N2,MSRef,L,N3}, MS)
   when %% (is_atom(Table) or is_integer(Table)),
        is_integer(N1),
        is_integer(N2),
-       byte_size(Bin) =:= 0,
+      %% is_reference(MSRef),
        is_list(L),
        is_integer(N3) ->
-    case ets:is_compiled_ms(Bin) of
+    case ets:is_compiled_ms(MSRef) of
 	true ->
 	    Untouched;
 	false ->
-- 
cgit v1.2.3


From 3bdb334a95e9dd49c1ada55b6f442099fdf7f72b Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Tue, 24 Jan 2017 15:10:55 +0100
Subject: Use magic refs for binary compile patterns

---
 lib/stdlib/src/binary.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl
index ccc827ca2d..45666fbcb4 100644
--- a/lib/stdlib/src/binary.erl
+++ b/lib/stdlib/src/binary.erl
@@ -24,7 +24,7 @@
 
 -export_type([cp/0]).
 
--opaque cp() :: {'am' | 'bm', binary()}.
+-opaque cp() :: {'am' | 'bm', reference()}.
 -type part() :: {Start :: non_neg_integer(), Length :: integer()}.
 
 %%% BIFs.
-- 
cgit v1.2.3


From 1d886081027c4d4fcfbf7f73d4708694cad582f5 Mon Sep 17 00:00:00 2001
From: Richard Carlsson <richardc@klarna.com>
Date: Sat, 4 Feb 2017 15:31:14 +0100
Subject: Deprecate filename:find_src/1/2

---
 lib/stdlib/src/filename.erl     | 3 +++
 lib/stdlib/src/otp_internal.erl | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 0ff22f876a..2a2f25dcd2 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -19,6 +19,9 @@
 %%
 -module(filename).
 
+-deprecated({find_src,1,next_major_release}).
+-deprecated({find_src,2,next_major_release}).
+
 %% Purpose: Provides generic manipulation of filenames.
 %%
 %% Generally, these functions accept filenames in the native format
diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index 5bf77a5160..2a0e3118d0 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -550,6 +550,13 @@ obsolete_1(overload, _, _) ->
 obsolete_1(rpc, safe_multi_server_call, A) when A =:= 2; A =:= 3 ->
     {removed, {rpc, multi_server_call, A}};
 
+%% Added in OTP 20.
+
+obsolete_1(filename, find_src, 1) ->
+    {deprecated, "deprecated; use filelib:find_source/1 instead"};
+obsolete_1(filename, find_src, 2) ->
+    {deprecated, "deprecated; use filelib:find_source/3 instead"};
+
 %% Removed in OTP 20.
 
 obsolete_1(erlang, hash, 2) ->
-- 
cgit v1.2.3


From 6f5c79240554dbb5caaafcb9124e8917e62c980d Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Mon, 6 Feb 2017 15:45:45 +0100
Subject: stdlib: Improve Erlang shell's tab-completion of long names

---
 lib/stdlib/src/edlin_expand.erl | 95 +++++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 31 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/edlin_expand.erl b/lib/stdlib/src/edlin_expand.erl
index 5f821caef0..a1a97af4c5 100644
--- a/lib/stdlib/src/edlin_expand.erl
+++ b/lib/stdlib/src/edlin_expand.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2005-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2005-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -101,44 +101,77 @@ match(Prefix, Alts, Extra0) ->
 
 %% Return the list of names L in multiple columns.
 format_matches(L) ->
-    S = format_col(lists:sort(L), []),
+    {S1, Dots} = format_col(lists:sort(L), []),
+    S = case Dots of
+            true ->
+                {_, Prefix} = longest_common_head(vals(L)),
+                PrefixLen = length(Prefix),
+                case PrefixLen =< 3 of
+                    true -> S1; % Do not replace the prefix with "...".
+                    false ->
+                        LeadingDotsL = leading_dots(L, PrefixLen),
+                        {S2, _} = format_col(lists:sort(LeadingDotsL), []),
+                        S2
+                end;
+            false -> S1
+        end,
     ["\n" | S].
 
 format_col([], _) -> [];
-format_col(L, Acc) -> format_col(L, field_width(L), 0, Acc).
-
-format_col(X, Width, Len, Acc) when Width + Len > 79 ->
-    format_col(X, Width, 0, ["\n" | Acc]);
-format_col([A|T], Width, Len, Acc0) ->
-    H = case A of
- 	    %% If it's a tuple {string(), integer()}, we assume it's an
- 	    %% arity, and meant to be printed.
-	    {H0, I} when is_integer(I) ->
-		H0 ++ "/" ++ integer_to_list(I);
-	    {H1, _} -> H1;
- 	    H2 -> H2
- 	end,
-    Acc = [io_lib:format("~-*ts", [Width,H]) | Acc0],
-    format_col(T, Width, Len+Width, Acc);
-format_col([], _, _, Acc) ->
-    lists:reverse(Acc, "\n").
-
-field_width(L) -> field_width(L, 0).
-
-field_width([{H,_}|T], W) ->
+format_col(L, Acc) ->
+    LL = 79,
+    format_col(L, field_width(L, LL), 0, Acc, LL, false).
+
+format_col(X, Width, Len, Acc, LL, Dots) when Width + Len > LL ->
+    format_col(X, Width, 0, ["\n" | Acc], LL, Dots);
+format_col([A|T], Width, Len, Acc0, LL, Dots) ->
+    {H0, R} = format_val(A),
+    Hmax = LL - length(R),
+    {H, NewDots} =
+        case length(H0) > Hmax of
+            true -> {io_lib:format("~-*ts", [Hmax - 3, H0]) ++ "...", true};
+            false -> {H0, Dots}
+        end,
+    Acc = [io_lib:format("~-*ts", [Width, H ++ R]) | Acc0],
+    format_col(T, Width, Len+Width, Acc, LL, NewDots);
+format_col([], _, _, Acc, _LL, Dots) ->
+    {lists:reverse(Acc, "\n"), Dots}.
+
+format_val({H, I}) when is_integer(I) ->
+    %% If it's a tuple {string(), integer()}, we assume it's an
+    %% arity, and meant to be printed.
+    {H, "/" ++ integer_to_list(I)};
+format_val({H, _}) ->
+    {H, ""};
+format_val(H) ->
+    {H, ""}.
+
+field_width(L, LL) -> field_width(L, 0, LL).
+
+field_width([{H,_}|T], W, LL) ->
     case length(H) of
- 	L when L > W -> field_width(T, L);
- 	_ -> field_width(T, W)
+        L when L > W -> field_width(T, L, LL);
+        _ -> field_width(T, W, LL)
     end;
-field_width([H|T], W) ->
+field_width([H|T], W, LL) ->
     case length(H) of
- 	L when L > W -> field_width(T, L);
- 	_ -> field_width(T, W)
+        L when L > W -> field_width(T, L, LL);
+        _ -> field_width(T, W, LL)
     end;
-field_width([], W) when W < 40 ->
+field_width([], W, LL) when W < LL - 3 ->
     W + 4;
-field_width([], _) ->
-    40.
+field_width([], _, LL) ->
+    LL.
+
+vals([]) -> [];
+vals([{S, _}|L]) -> [S|vals(L)];
+vals([S|L]) -> [S|vals(L)].
+
+leading_dots([], _Len) -> [];
+leading_dots([{H, I}|L], Len) ->
+    [{"..." ++ nthtail(Len, H), I}|leading_dots(L, Len)];
+leading_dots([H|L], Len) ->
+    ["..." ++ nthtail(Len, H)|leading_dots(L, Len)].
 
 longest_common_head([]) ->
     no;
-- 
cgit v1.2.3


From 4bf5cb8aec7680a51a99a92ce124ca270c9b5895 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@plataformatec.com.br>
Date: Sun, 12 Feb 2017 19:59:12 +0100
Subject: Use maps instead of dict in erl_expand_records

---
 lib/stdlib/src/erl_expand_records.erl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/erl_expand_records.erl b/lib/stdlib/src/erl_expand_records.erl
index 2280464bff..16220bceb4 100644
--- a/lib/stdlib/src/erl_expand_records.erl
+++ b/lib/stdlib/src/erl_expand_records.erl
@@ -30,13 +30,13 @@
 
 -import(lists, [map/2,foldl/3,foldr/3,sort/1,reverse/1,duplicate/2]).
 
--record(exprec, {compile=[],          % Compile flags
-                 vcount=0,            % Variable counter
-		 calltype=#{},	      % Call types
-                 records=dict:new(),  % Record definitions
-                 strict_ra=[],        % strict record accesses
-                 checked_ra=[]        % successfully accessed records
-                }).
+-record(exprec, {compile=[],	% Compile flags
+		 vcount=0,	% Variable counter
+		 calltype=#{},	% Call types
+		 records=#{},	% Record definitions
+		 strict_ra=[],	% strict record accesses
+		 checked_ra=[]	% successfully accessed records
+		}).
 
 -spec(module(AbsForms, CompileOptions) -> AbsForms2 when
       AbsForms :: [erl_parse:abstract_form()],
@@ -72,7 +72,7 @@ init_calltype_imports([], Ctype) -> Ctype.
 
 forms([{attribute,_,record,{Name,Defs}}=Attr | Fs], St0) ->
     NDefs = normalise_fields(Defs),
-    St = St0#exprec{records=dict:store(Name, NDefs, St0#exprec.records)},
+    St = St0#exprec{records=maps:put(Name, NDefs, St0#exprec.records)},
     {Fs1, St1} = forms(Fs, St),
     {[Attr | Fs1], St1};
 forms([{function,L,N,A,Cs0} | Fs0], St0) ->
@@ -546,7 +546,7 @@ normalise_fields(Fs) ->
 %% record_fields(RecordName, State)
 %% find_field(FieldName, Fields)
 
-record_fields(R, St) -> dict:fetch(R, St#exprec.records).
+record_fields(R, St) -> maps:get(R, St#exprec.records).
 
 find_field(F, [{record_field,_,{atom,_,F},Val} | _]) -> {ok,Val};
 find_field(F, [_ | Fs]) -> find_field(F, Fs);
-- 
cgit v1.2.3


From a3291799c29e82bb2725a589ef0f804dfbd9eac7 Mon Sep 17 00:00:00 2001
From: Andrew Dryga <andrew@dryga.com>
Date: Sun, 12 Feb 2017 19:54:46 +0200
Subject: Fixed typos in lib/stdlib

---
 lib/stdlib/src/gen_fsm.erl   | 2 +-
 lib/stdlib/src/io_lib.erl    | 2 +-
 lib/stdlib/src/proplists.erl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/gen_fsm.erl b/lib/stdlib/src/gen_fsm.erl
index 6e7528fd98..e925a75fe8 100644
--- a/lib/stdlib/src/gen_fsm.erl
+++ b/lib/stdlib/src/gen_fsm.erl
@@ -273,7 +273,7 @@ start_timer(Time, Msg) ->
 send_event_after(Time, Event) ->
     erlang:start_timer(Time, self(), {'$gen_event', Event}).
 
-%% Returns the remaing time for the timer if Ref referred to 
+%% Returns the remaining time for the timer if Ref referred to
 %% an active timer/send_event_after, false otherwise.
 cancel_timer(Ref) ->
     case erlang:cancel_timer(Ref) of
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index ad98bc0420..a91143a764 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -28,7 +28,7 @@
 %% Most of the code here is derived from the original prolog versions and
 %% from similar code written by Joe Armstrong and myself.
 %%
-%% This module has been split into seperate modules:
+%% This module has been split into separate modules:
 %% io_lib        - basic write and utilities
 %% io_lib_format - formatted output
 %% io_lib_fread  - formatted input
diff --git a/lib/stdlib/src/proplists.erl b/lib/stdlib/src/proplists.erl
index 21de8c45c1..340dfdcac9 100644
--- a/lib/stdlib/src/proplists.erl
+++ b/lib/stdlib/src/proplists.erl
@@ -83,7 +83,7 @@ property(Key, Value) ->
 
 %% ---------------------------------------------------------------------
 
-%% @doc Unfolds all occurences of atoms in <code>ListIn</code> to tuples
+%% @doc Unfolds all occurrences of atoms in <code>ListIn</code> to tuples
 %% <code>{Atom, true}</code>.
 %%
 %% @see compact/1
-- 
cgit v1.2.3


From aa0c4b0df7cdc750450906aff4e8c81627d80605 Mon Sep 17 00:00:00 2001
From: Paul Schoenfelder <paulschoenfelder@gmail.com>
Date: Tue, 31 Jan 2017 17:40:34 -0600
Subject: Update erl_tar to support PAX format, etc.

This commit introduces the following key changes:

- Support for reading tar archives in formats currently in common use,
  such as v7, STAR, USTAR, PAX, and GNU tar's extensions to the
  STAR/USTAR format.
- Support for writing PAX archives, only when necessary, using USTAR
  when possible for greater portability.

These changes result in lifting of some prior restrictions:

- Support for reading archives produced by modern tar implementations
  when other restrictions described below are present.
- Support for filenames which exceed 100 bytes in length, or paths which
  exceed 255 bytes (see USTAR format specification for more details on
  this restriction).
- Support for filenames of arbitrary length
- Support for unicode metadata (the previous behaviour of erl_tar was
  actually violating the spec, by writing unicode-encoded data to fields
  which are defined to be 7-bit ASCII, even though this technically
  worked when using erl_tar at source and destination, it may not have
  worked with other tar utilities, and this implementation now conforms
  to the spec).
- Support for uid/gid values which cannot be converted to octal
  integers.
---
 lib/stdlib/src/Makefile    |    4 +-
 lib/stdlib/src/erl_tar.erl | 2562 +++++++++++++++++++++++++++++---------------
 lib/stdlib/src/erl_tar.hrl |  394 +++++++
 3 files changed, 2104 insertions(+), 856 deletions(-)
 create mode 100644 lib/stdlib/src/erl_tar.hrl

(limited to 'lib/stdlib/src')

diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index d6c0ff8d8d..ed3dfb342c 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -130,7 +130,7 @@ HRL_FILES= \
 	../include/qlc.hrl \
 	../include/zip.hrl
 
-INTERNAL_HRL_FILES= dets.hrl
+INTERNAL_HRL_FILES= dets.hrl erl_tar.hrl
 
 ERL_FILES= $(MODULES:%=%.erl)
 
@@ -228,7 +228,7 @@ $(EBIN)/dets_v9.beam: dets.hrl
 $(EBIN)/erl_bits.beam: ../include/erl_bits.hrl
 $(EBIN)/erl_compile.beam: ../include/erl_compile.hrl ../../kernel/include/file.hrl
 $(EBIN)/erl_lint.beam: ../include/erl_bits.hrl
-$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl
+$(EBIN)/erl_tar.beam: ../../kernel/include/file.hrl erl_tar.hrl
 $(EBIN)/file_sorter.beam: ../../kernel/include/file.hrl
 $(EBIN)/filelib.beam: ../../kernel/include/file.hrl
 $(EBIN)/filename.beam: ../../kernel/include/file.hrl
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index a383a0fc67..086e77cd28 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -1,8 +1,8 @@
 %%
 %% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 1997-2016. All Rights Reserved.
-%% 
+%%
+%% Copyright Ericsson AB 1997-2017. All Rights Reserved.
+%%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
 %% You may obtain a copy of the License at
@@ -14,191 +14,245 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
+%% This module implements extraction/creation of tar archives.
+%% It supports reading most common tar formats, namely V7, STAR,
+%% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
+%% format, unless it must use PAX headers, in which case it produces PAX
+%% format.
+%%
+%% The following references where used:
+%%   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
+%%   http://www.gnu.org/software/tar/manual/html_node/Standard.html
+%%   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
 -module(erl_tar).
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Purpose: Unix tar (tape archive) utility.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
--export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2,
-	 open/2, close/1, add/3, add/4,
-	 t/1, tt/1, format_error/1]).
+-export([init/3,
+         create/2, create/3,
+         extract/1, extract/2,
+         table/1, table/2, t/1, tt/1,
+         open/2, close/1,
+         add/3, add/4,
+         format_error/1]).
 
 -include_lib("kernel/include/file.hrl").
+-include_lib("erl_tar.hrl").
 
--record(add_opts,
-	{read_info,				% Fun to use for read file/link info.
-	 chunk_size = 0,  % For file reading when sending to sftp. 0=do not chunk
-	 verbose = false :: boolean()}).	% Verbose on/off.
-
-%% Opens a tar archive.
-
-init(UsrHandle, AccessMode, Fun) when is_function(Fun,2) ->   
-    {ok, {AccessMode,{tar_descriptor,UsrHandle,Fun}}}.
-
-%%%================================================================		   
-%%% The open function with friends is to keep the file and binary api of this module
-open(Name, Mode) ->
-    case open_mode(Mode) of
-	{ok, Access, Raw, Opts} ->
-	    open1(Name, Access, Raw, Opts);
-	{error, Reason} ->
-	    {error, {Name, Reason}}
-    end.
-
-open1({binary,Bin}, read, _Raw, Opts) ->
-    case file:open(Bin, [ram,binary,read]) of
-	{ok,File} ->
-            _ = [ram_file:uncompress(File) || Opts =:= [compressed]],
-	    init(File,read,file_fun());
-	Error ->
-	    Error
-    end;
-open1({file, Fd}, read, _Raw, _Opts) ->
-    init(Fd, read, file_fun());
-open1(Name, Access, Raw, Opts) ->
-    case file:open(Name, Raw ++ [binary, Access|Opts]) of
-	{ok, File} ->
-	    init(File, Access, file_fun());
-	{error, Reason} ->
-	    {error, {Name, Reason}}
-    end.
-
-file_fun() ->
-    fun(write, {Fd,Data}) ->  file:write(Fd, Data);
-       (position, {Fd,Pos}) -> file:position(Fd, Pos);
-       (read2, {Fd,Size}) -> file:read(Fd,Size);
-       (close, Fd) -> file:close(Fd)
-    end.
-
-%%% End of file and binary api (except for open_mode/1 downwards
-%%%================================================================		   
-
-%% Closes a tar archive.
-
-close({read, File}) ->
-    ok = do_close(File);
-close({write, File}) ->
-    PadResult = pad_file(File),
-    ok = do_close(File),
-    PadResult;
-close(_) ->
-    {error, einval}.
-
-%% Adds a file to a tape archive.
-
-add(File, Name, Options) ->
-    add(File, Name, Name, Options).
-add({write, File}, Name, NameInArchive, Options) ->
-    Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end},
-    add1(File, Name, NameInArchive, add_opts(Options, Opts));
-add({read, _File}, _, _, _) ->
-    {error, eacces};
-add(_, _, _, _) ->
-    {error, einval}.
-
-add_opts([dereference|T], Opts) ->
-    add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end});
-add_opts([verbose|T], Opts) ->
-    add_opts(T, Opts#add_opts{verbose=true});
-add_opts([{chunks,N}|T], Opts) ->
-    add_opts(T, Opts#add_opts{chunk_size=N});
-add_opts([_|T], Opts) ->
-    add_opts(T, Opts);
-add_opts([], Opts) ->
-    Opts.
-
-%% Creates a tar file Name containing the given files.
-
-create(Name, Filenames) ->
-    create(Name, Filenames, []).
-
-%% Creates a tar archive Name containing the given files.
-%% Accepted options: verbose, compressed, cooked
+%% Converts the short error reason to a descriptive string.
+-spec format_error(term()) -> string().
+format_error(invalid_tar_checksum) ->
+    "Checksum failed";
+format_error(bad_header) ->
+    "Unrecognized tar header format";
+format_error({bad_header, Reason}) ->
+    lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
+format_error({invalid_header, negative_size}) ->
+    "Invalid header: negative size";
+format_error(invalid_sparse_header_size) ->
+    "Invalid sparse header: negative size";
+format_error(invalid_sparse_map_entry) ->
+    "Invalid sparse map entry";
+format_error({invalid_sparse_map_entry, Reason}) ->
+    lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
+format_error(invalid_end_of_archive) ->
+    "Invalid end of archive";
+format_error(eof) ->
+    "Unexpected end of file";
+format_error(integer_overflow) ->
+    "Failed to parse numeric: integer overflow";
+format_error({misaligned_read, Pos}) ->
+    lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
+                                [?BLOCK_SIZE, Pos]));
+format_error(invalid_gnu_1_0_sparsemap) ->
+    "Invalid GNU sparse map (version 1.0)";
+format_error({invalid_gnu_0_1_sparsemap, Format}) ->
+    lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
+format_error({Name,Reason}) ->
+    lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
+format_error(Atom) when is_atom(Atom) ->
+    file:format_error(Atom);
+format_error(Term) ->
+    lists:flatten(io_lib:format("~tp", [Term])).
 
-create(Name, FileList, Options) ->
-    Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) 
-                        end, Options),
-    case open(Name, [write|Mode]) of
-	{ok, TarFile} ->
-	    Add = fun({NmInA, NmOrBin}) -> 
-			  add(TarFile, NmOrBin, NmInA, Options);
-		     (Nm) -> 
-			  add(TarFile, Nm, Nm, Options)
-		  end,
-	    Result = foreach_while_ok(Add, FileList),
-	    case {Result, close(TarFile)} of
-		{ok, Res} -> Res;
-		{Res, _} -> Res
-	    end;
-	Reason ->
-	    Reason
-    end.
+%% Initializes a new reader given a custom file handle and I/O wrappers
+-spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}.
+init(Handle, AccessMode, Fun) when is_function(Fun, 2) ->
+    Reader = #reader{handle=Handle,access=AccessMode,func=Fun},
+    {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
+    {ok, Reader2#reader{pos=Pos}};
+init(_Handle, _AccessMode, _Fun) ->
+    {error, badarg}.
 
+%%%================================================================
 %% Extracts all files from the tar file Name.
-
+-spec extract(open_handle()) -> ok | {error, term()}.
 extract(Name) ->
     extract(Name, []).
 
 %% Extracts (all) files from the tar file Name.
-%% Options accepted: keep_old_files, {files, ListOfFilesToExtract}, verbose,
-%%		{cwd, AbsoluteDirectory}
+%% Options accepted:
+%%  - cooked: Opens the tar file without mode `raw`
+%%  - compressed: Uncompresses the tar file when reading
+%%  - memory: Returns the tar contents as a list of tuples {Name, Bin}
+%%  - keep_old_files: Extracted files will not overwrite the destination
+%%  - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
+%%  - verbose: Prints verbose information about the extraction,
+%%  - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
+-spec extract(open_handle(), [extract_opt()]) ->
+                     ok
+                         | {ok, [{string(), binary()}]}
+                         | {error, term()}.
+extract({binary, Bin}, Opts) when is_list(Opts) ->
+    do_extract({binary, Bin}, Opts);
+extract({file, Fd}, Opts) when is_list(Opts) ->
+    do_extract({file, Fd}, Opts);
+extract(#reader{}=Reader, Opts) when is_list(Opts) ->
+    do_extract(Reader, Opts);
+extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
+    do_extract(Name, Opts).
+
+do_extract(Handle, Opts) when is_list(Opts) ->
+    Opts2 = extract_opts(Opts),
+    Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
+    foldl_read(Handle, fun extract1/4, Acc, Opts2).
+
+extract1(eof, Reader, _, Acc) when is_list(Acc) ->
+    {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, Acc) ->
+    {ok, Acc, Reader};
+extract1(#tar_header{name=Name,size=Size}=Header, Reader, Opts, Acc) ->
+    case check_extract(Name, Opts) of
+        true ->
+            case do_read(Reader, Size) of
+                {ok, Bin, Reader2} ->
+                    case write_extracted_element(Header, Bin, Opts) of
+                        ok ->
+                            {ok, Acc, Reader2};
+                        {ok, NameBin} when is_list(Acc) ->
+                            {ok, [NameBin | Acc], Reader2};
+                        {error, _} = Err ->
+                            throw(Err)
+                    end;
+                {error, _} = Err ->
+                    throw(Err)
+            end;
+        false ->
+            {ok, Acc, skip_file(Reader)}
+    end.
 
-extract(Name, Opts) ->
-    foldl_read(Name, fun extract1/4, ok, extract_opts(Opts)).
+%% Checks if the file Name should be extracted.
+check_extract(_, #read_opts{files=all}) ->
+    true;
+check_extract(Name, #read_opts{files=Files}) ->
+    ordsets:is_element(Name, Files).
 
-%% Returns a list of names of the files in the tar file Name.
-%% Options accepted: verbose
+%%%================================================================
+%% The following table functions produce a list of information about
+%% the files contained in the archive.
+-type filename() :: string().
+-type typeflag() :: regular | link | symlink |
+                    char | block | directory |
+                    fifo | reserved | unknown.
+-type mode() :: non_neg_integer().
+-type uid() :: non_neg_integer().
+-type gid() :: non_neg_integer().
+
+-type tar_entry() :: {filename(),
+                      typeflag(),
+                      non_neg_integer(),
+                      calendar:datetime(),
+                      mode(),
+                      uid(),
+                      gid()}.
 
+%% Returns a list of names of the files in the tar file Name.
+-spec table(open_handle()) -> {ok, [string()]} | {error, term()}.
 table(Name) ->
     table(Name, []).
 
 %% Returns a list of names of the files in the tar file Name.
 %% Options accepted: compressed, verbose, cooked.
-
-table(Name, Opts) ->
+-spec table(open_handle(), [compressed | verbose | cooked]) ->
+                   {ok, [tar_entry()]} | {error, term()}.
+table(Name, Opts) when is_list(Opts) ->
     foldl_read(Name, fun table1/4, [], table_opts(Opts)).
 
+table1(eof, Reader, _, Result) ->
+    {ok, {ok, lists:reverse(Result)}, Reader};
+table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
+    Attrs = table1_attrs(Header, Verbose),
+    Reader2 = skip_file(Reader),
+    {ok, [Attrs|Result], Reader2}.
+
+%% Extracts attributes relevant to table1's output
+table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
+    Type = typeflag(Typeflag),
+    Name = Header#tar_header.name,
+    Mtime = Header#tar_header.mtime,
+    Uid = Header#tar_header.uid,
+    Gid = Header#tar_header.gid,
+    Size = Header#tar_header.size,
+    {Name, Type, Size, Mtime, Mode, Uid, Gid};
+table1_attrs(#tar_header{name=Name}, _Verbose) ->
+    Name.
+
+typeflag(?TYPE_REGULAR) -> regular;
+typeflag(?TYPE_REGULAR_A) -> regular;
+typeflag(?TYPE_GNU_SPARSE) -> regular;
+typeflag(?TYPE_CONT) -> regular;
+typeflag(?TYPE_LINK) -> link;
+typeflag(?TYPE_SYMLINK) -> symlink;
+typeflag(?TYPE_CHAR) -> char;
+typeflag(?TYPE_BLOCK) -> block;
+typeflag(?TYPE_DIR) -> directory;
+typeflag(?TYPE_FIFO) -> fifo;
+typeflag(_) -> unknown.
 
+%%%================================================================
 %% Comments for printing the contents of a tape archive,
 %% meant to be invoked from the shell.
 
-t(Name) ->
+%% Prints each filename in the archive
+-spec t(file:filename()) -> ok | {error, term()}.
+t(Name) when is_list(Name); is_binary(Name) ->
     case table(Name) of
-	{ok, List} ->
-	    lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
-	Error ->
-	    Error
+        {ok, List} ->
+            lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
+        Error ->
+            Error
     end.
 
+%% Prints verbose information about each file in the archive
+-spec tt(open_handle()) -> ok | {error, term()}.
 tt(Name) ->
     case table(Name, [verbose]) of
-	{ok, List} ->
-	    lists:foreach(fun print_header/1, List);
-	Error ->
-	    Error
+        {ok, List} ->
+            lists:foreach(fun print_header/1, List);
+        Error ->
+            Error
     end.
 
+%% Used by tt/1 to print a tar_entry tuple
+-spec print_header(tar_entry()) -> ok.
 print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
     io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
-	      [type_to_string(Type), mode_to_string(Mode),
-	       Uid, Gid, Size, time_to_string(Mtime), Name]).
+              [type_to_string(Type), mode_to_string(Mode),
+               Uid, Gid, Size, time_to_string(Mtime), Name]).
 
-type_to_string(regular) -> "-";
+type_to_string(regular)   -> "-";
 type_to_string(directory) -> "d";
-type_to_string(link) -> "l";
-type_to_string(symlink) -> "s";
-type_to_string(char) -> "c";
-type_to_string(block) -> "b";
-type_to_string(fifo) -> "f";
-type_to_string(_) -> "?".
-
+type_to_string(link)      -> "l";
+type_to_string(symlink)   -> "s";
+type_to_string(char)      -> "c";
+type_to_string(block)     -> "b";
+type_to_string(fifo)      -> "f";
+type_to_string(unknown)   -> "?".
+
+%% Converts a numeric mode to its human-readable representation
 mode_to_string(Mode) ->
     mode_to_string(Mode, "xwrxwrxwr", []).
-
 mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
     mode_to_string(Mode bsr 1, T, [C|Acc]);
 mode_to_string(Mode, [_|T], Acc) ->
@@ -206,6 +260,7 @@ mode_to_string(Mode, [_|T], Acc) ->
 mode_to_string(_, [], Acc) ->
     Acc.
 
+%% Converts a datetime tuple to a readable string
 time_to_string({{Y, Mon, Day}, {H, Min, _}}) ->
     io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
 
@@ -225,809 +280,1608 @@ month(10) -> "Oct";
 month(11) -> "Nov";
 month(12) -> "Dec".
 
-%% Converts the short error reason to a descriptive string.
+%%%================================================================
+%% The open function with friends is to keep the file and binary api of this module
+-type open_handle() :: file:filename()
+                     | {binary, binary()}
+                     | {file, term()}.
+-spec open(open_handle(), [write | compressed | cooked]) ->
+                  {ok, reader()} | {error, term()}.
+open({binary, Bin}, Mode) when is_binary(Bin) ->
+    do_open({binary, Bin}, Mode);
+open({file, Fd}, Mode) ->
+    do_open({file, Fd}, Mode);
+open(Name, Mode) when is_list(Name); is_binary(Name) ->
+    do_open(Name, Mode).
+
+do_open(Name, Mode) when is_list(Mode) ->
+    case open_mode(Mode) of
+        {ok, Access, Raw, Opts} ->
+            open1(Name, Access, Raw, Opts);
+        {error, Reason} ->
+            {error, {Name, Reason}}
+    end.
 
-format_error(bad_header) -> "Bad directory header";
-format_error(eof) -> "Unexpected end of file";
-format_error(symbolic_link_too_long) -> "Symbolic link too long";
-format_error({Name,Reason}) ->
-    lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
-format_error(Atom) when is_atom(Atom) ->
-    file:format_error(Atom);
-format_error(Term) ->
-    lists:flatten(io_lib:format("~tp", [Term])).
+open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) ->
+    case file:open(Bin, [ram,binary,read]) of
+        {ok,File} ->
+            _ = [ram_file:uncompress(File) || Opts =:= [compressed]],
+            {ok, #reader{handle=File,access=read,func=fun file_op/2}};
+        Error ->
+            Error
+    end;
+open1({file, Fd}, read, _Raw, _Opts) ->
+    Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
+    case do_position(Reader, {cur, 0}) of
+        {ok, Pos, Reader2} ->
+            {ok, Reader2#reader{pos=Pos}};
+        {error, _} = Err ->
+            Err
+    end;
+open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
+    case file:open(Name, Raw ++ [binary, Access|Opts]) of
+        {ok, File} ->
+            {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
+        {error, Reason} ->
+            {error, {Name, Reason}}
+    end.
 
+open_mode(Mode) ->
+    open_mode(Mode, false, [raw], []).
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%%	Useful definitions (also start of implementation).
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Offset for fields in the tar header.
-%% Note that these offsets are ZERO-based as in the POSIX standard
-%% document, while binaries use ONE-base offset.  Caveat Programmer.
-
--define(th_name, 0).
--define(th_mode, 100).
--define(th_uid, 108).
--define(th_gid, 116).
--define(th_size, 124).
--define(th_mtime, 136).
--define(th_chksum, 148).
--define(th_typeflag, 156).
--define(th_linkname, 157).
--define(th_magic, 257).
--define(th_version, 263).
--define(th_prefix, 345).
-
-%% Length of these fields.
-
--define(th_name_len, 100).
--define(th_mode_len, 8).
--define(th_uid_len, 8).
--define(th_gid_len, 8).
--define(th_size_len, 12).
--define(th_mtime_len, 12).
--define(th_chksum_len, 8).
--define(th_linkname_len, 100).
--define(th_magic_len, 6).
--define(th_version_len, 2).
--define(th_prefix_len, 167).
-
--record(tar_header,
-	{name,					% Name of file.
-	 mode,					% Mode bits.
-	 uid,					% User id.
-	 gid,					% Group id.
-	 size,					% Size of file
-	 mtime,					% Last modified (seconds since
-						% Jan 1, 1970).
-	 chksum,				% Checksum of header.
-	 typeflag = [],				% Type of file.
-	 linkname = [],				% Name of link.
-	 filler = [],
-	 prefix}).				% Filename prefix.
-
--define(record_size, 512).
--define(block_size, (512*20)).
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% 	Adding members to a tar archive.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-add1(TarFile, Bin, NameInArchive, Opts) when is_binary(Bin) ->
-    Now = calendar:now_to_local_time(erlang:timestamp()),
-    Info = #file_info{size = byte_size(Bin),
-		      type = regular,
-		      access = read_write,
-		      atime = Now,
-		      mtime = Now,
-		      ctime = Now,
-		      mode  = 8#100644,
-		      links = 1,
-		      major_device = 0,
-		      minor_device = 0,
-		      inode = 0,
-		      uid = 0,
-		      gid = 0},
-    Header = create_header(NameInArchive, Info),
-    add1(TarFile, NameInArchive, Header, Bin, Opts);
-add1(TarFile, Name, NameInArchive, Opts) ->
-    case read_file_and_info(Name, Opts) of
-	{ok, Bin, Info} when Info#file_info.type =:= regular ->
-	    Header = create_header(NameInArchive, Info),
-	    add1(TarFile, Name, Header, Bin, Opts);
-	{ok, PointsTo, Info} when Info#file_info.type =:= symlink ->
-	    if
-		length(PointsTo) > 100 ->
-		    {error,{PointsTo,symbolic_link_too_long}};
-		true ->
-		    Info2 = Info#file_info{size=0},
-		    Header = create_header(NameInArchive, Info2, PointsTo),
-		    add1(TarFile, Name, Header, list_to_binary([]), Opts)
-	    end;
-	{ok, _, Info} when Info#file_info.type =:= directory ->
-	    add_directory(TarFile, Name, NameInArchive, Info, Opts);
-	{ok, _, #file_info{type=Type}} ->
-	    {error, {bad_file_type, Name, Type}};
-	{error, Reason} ->
-	    {error, {Name, Reason}}
+open_mode(read, _, Raw, _) ->
+    {ok, read, Raw, []};
+open_mode(write, _, Raw, _) ->
+    {ok, write, Raw, []};
+open_mode([read|Rest], false, Raw, Opts) ->
+    open_mode(Rest, read, Raw, Opts);
+open_mode([write|Rest], false, Raw, Opts) ->
+    open_mode(Rest, write, Raw, Opts);
+open_mode([compressed|Rest], Access, Raw, Opts) ->
+    open_mode(Rest, Access, Raw, [compressed|Opts]);
+open_mode([cooked|Rest], Access, _Raw, Opts) ->
+    open_mode(Rest, Access, [], Opts);
+open_mode([], Access, Raw, Opts) ->
+    {ok, Access, Raw, Opts};
+open_mode(_, _, _, _) ->
+    {error, einval}.
+
+file_op(write, {Fd, Data}) ->
+    file:write(Fd, Data);
+file_op(position, {Fd, Pos}) ->
+    file:position(Fd, Pos);
+file_op(read2, {Fd, Size}) ->
+    file:read(Fd, Size);
+file_op(close, Fd) ->
+    file:close(Fd).
+
+%% Closes a tar archive.
+-spec close(reader()) -> ok | {error, term()}.
+close(#reader{access=read}=Reader) ->
+    ok = do_close(Reader);
+close(#reader{access=write}=Reader) ->
+    {ok, Reader2} = pad_file(Reader),
+    ok = do_close(Reader2),
+    ok;
+close(_) ->
+    {error, einval}.
+
+pad_file(#reader{pos=Pos}=Reader) ->
+    %% There must be at least two zero blocks at the end.
+    PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
+    Padding = <<0:PadCurrent/unit:8>>,
+    do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
+
+
+%%%================================================================
+%% Creation/modification of tar archives
+
+%% Creates a tar file Name containing the given files.
+-spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}.
+create(Name, FileList) when is_list(Name); is_binary(Name) ->
+    create(Name, FileList, []).
+
+%% Creates a tar archive Name containing the given files.
+%% Accepted options: verbose, compressed, cooked
+-spec create(file:filename(), filelist(), [create_opt()]) ->
+                    ok | {error, term()} | {error, {string(), term()}}.
+create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
+    Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
+                        end, Options),
+    case open(Name, [write|Mode]) of
+        {ok, TarFile} ->
+            do_create(TarFile, FileList, Options);
+        {error, _} = Err ->
+            Err
     end.
 
-add1(Tar, Name, Header, chunked, Options) ->
-    add_verbose(Options, "a ~ts [chunked ", [Name]),
-    try
-	ok = do_write(Tar, Header),
-	{ok,D} = file:open(Name, [read,binary]),
-	{ok,NumBytes} = add_read_write_chunks(D, Tar, Options#add_opts.chunk_size, 0, Options),
-	_ = file:close(D),
-	ok = do_write(Tar, padding(NumBytes,?record_size))
-    of 
-	ok ->
-	    add_verbose(Options, "~n", []),
-	    ok
-    catch
-	error:{badmatch,{error,Error}} ->
-	    add_verbose(Options, "~n", []),
-	    {error,{Name,Error}}
+do_create(TarFile, [], _Opts) ->
+    close(TarFile);
+do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
+    case add(TarFile, NameOrBin, NameInArchive, Opts) of
+        ok ->
+            do_create(TarFile, Rest, Opts);
+        {error, _} = Err ->
+            _ = close(TarFile),
+            Err
     end;
-add1(Tar, Name, Header, Bin, Options) ->
-    add_verbose(Options, "a ~ts~n", [Name]),
-    do_write(Tar, [Header, Bin, padding(byte_size(Bin), ?record_size)]).
-
-add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes, Options) ->
-    case file:read(D, ChunkSize) of
-	{ok,Bin} -> 
-	    ok = do_write(Tar, Bin),
-	    add_verbose(Options, ".", []),
-	    add_read_write_chunks(D, Tar, ChunkSize, SumNumBytes+byte_size(Bin), Options);
-	eof ->
-	    add_verbose(Options, "]", []),
-	    {ok,SumNumBytes};
-	Other ->
-	    Other
+do_create(TarFile, [Name|Rest], Opts) ->
+    case add(TarFile, Name, Name, Opts) of
+        ok ->
+            do_create(TarFile, Rest, Opts);
+        {error, _} = Err ->
+            _ = close(TarFile),
+            Err
     end.
 
-add_directory(TarFile, DirName, NameInArchive, Info, Options) ->
+%% Adds a file to a tape archive.
+-type add_type() :: string()
+                  | {string(), string()}
+                  | {string(), binary()}.
+-spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}.
+add(Reader, {NameInArchive, Name}, Opts)
+  when is_list(NameInArchive), is_list(Name) ->
+    do_add(Reader, Name, NameInArchive, Opts);
+add(Reader, {NameInArchive, Bin}, Opts)
+  when is_list(NameInArchive), is_binary(Bin) ->
+    do_add(Reader, Bin, NameInArchive, Opts);
+add(Reader, Name, Opts) when is_list(Name) ->
+    do_add(Reader, Name, Name, Opts).
+
+
+-spec add(reader(), string() | binary(), string(), [add_opt()]) ->
+                 ok | {error, term()}.
+add(Reader, NameOrBin, NameInArchive, Options)
+  when is_list(NameOrBin); is_binary(NameOrBin),
+       is_list(NameInArchive), is_list(Options) ->
+    do_add(Reader, NameOrBin, NameInArchive, Options).
+
+do_add(#reader{access=write}=Reader, Name, NameInArchive, Options)
+  when is_list(NameInArchive), is_list(Options) ->
+    Opts = #add_opts{read_info=fun(F) -> file:read_link_info(F) end},
+    add1(Reader, Name, NameInArchive, add_opts(Options, Opts));
+do_add(#reader{access=read},_,_,_) ->
+    {error, eacces};
+do_add(Reader,_,_,_) ->
+    {error, {badarg, Reader}}.
+
+add_opts([dereference|T], Opts) ->
+    add_opts(T, Opts#add_opts{read_info=fun(F) -> file:read_file_info(F) end});
+add_opts([verbose|T], Opts) ->
+    add_opts(T, Opts#add_opts{verbose=true});
+add_opts([{chunks,N}|T], Opts) ->
+    add_opts(T, Opts#add_opts{chunk_size=N});
+add_opts([_|T], Opts) ->
+    add_opts(T, Opts);
+add_opts([], Opts) ->
+    Opts.
+
+add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts)
+  when is_list(Name) ->
+    Res = case ReadInfo(Name) of
+              {error, Reason0} ->
+                  {error, {Name, Reason0}};
+              {ok, #file_info{type=symlink}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  {ok, Linkname} = file:read_link(Name),
+                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
+                  add_header(Reader, Header, Opts);
+              {ok, #file_info{type=regular}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  Header = fileinfo_to_header(NameInArchive, Fi, false),
+                  {ok, Reader2} = add_header(Reader, Header, Opts),
+                  FileSize = Header#tar_header.size,
+                  {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
+                  Padding = skip_padding(FileSize),
+                  Pad = <<0:Padding/unit:8>>,
+                  do_write(Reader3, Pad);
+              {ok, #file_info{type=directory}=Fi} ->
+                  add_directory(Reader, Name, NameInArchive, Fi, Opts);
+              {ok, #file_info{}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  Header = fileinfo_to_header(NameInArchive, Fi, false),
+                  add_header(Reader, Header, Opts)
+          end,
+    case Res of
+        ok -> ok;
+        {ok, _Reader} -> ok;
+        {error, _Reason} = Err -> Err
+    end;
+add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) ->
+    add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+    Now = calendar:now_to_local_time(erlang:timestamp()),
+    Header = #tar_header{
+                name = NameInArchive,
+                size = byte_size(Bin),
+                typeflag = ?TYPE_REGULAR,
+                atime = Now,
+                mtime = Now,
+                ctime = Now,
+                mode = 8#100644},
+    {ok, Reader2} = add_header(Reader, Header, Opts),
+    Padding = skip_padding(byte_size(Bin)),
+    Data = [Bin, <<0:Padding/unit:8>>],
+    case do_write(Reader2, Data) of
+        {ok, _Reader3} -> ok;
+        {error, Reason} -> {error, {NameInArchive, Reason}}
+    end.
+
+add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
     case file:list_dir(DirName) of
-	{ok, []} ->
-	    add_verbose(Options, "a ~ts~n", [DirName]),
-	    Header = create_header(NameInArchive, Info),
-	    do_write(TarFile, Header);
-	{ok, Files} ->
-	    Add = fun (File) ->
-			  add1(TarFile,
-			       filename:join(DirName, File),
-			       filename:join(NameInArchive, File),
-			       Options) end,
-	    foreach_while_ok(Add, Files);
-	{error, Reason} ->
-	    {error, {DirName, Reason}}
+        {ok, []} ->
+            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+            Header = fileinfo_to_header(NameInArchive, Info, false),
+            add_header(Reader, Header, Opts);
+        {ok, Files} ->
+            add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+            try add_files(Reader, Files, DirName, NameInArchive, Opts) of
+                ok -> ok;
+                {error, _} = Err -> Err
+            catch
+                throw:{error, {_Name, _Reason}} = Err -> Err;
+                throw:{error, Reason} -> {error, {DirName, Reason}}
+            end;
+        {error, Reason} ->
+            {error, {DirName, Reason}}
     end.
-    
-%% Creates a header for file in a tar file.
-
-create_header(Name, Info) ->
-    create_header(Name, Info, []).
-create_header(Name, #file_info {mode=Mode, uid=Uid, gid=Gid,
-				size=Size, mtime=Mtime0, type=Type}, Linkname) ->
-    Mtime = posix_time(erlang:localtime_to_universaltime(Mtime0)),
-    {Prefix,Suffix} = split_filename(Name),
-    H0 = [to_string(Suffix, 100),
-	  to_octal(Mode, 8),
-	  to_octal(Uid, 8),
-	  to_octal(Gid, 8),
-	  to_octal(Size, ?th_size_len),
-	  to_octal(Mtime, ?th_mtime_len),
-	  <<"        ">>,
-	  file_type(Type),
-	  to_string(Linkname, ?th_linkname_len),
-	  "ustar",0,
-	  "00",
-	  zeroes(?th_prefix-?th_version-?th_version_len),
-	  to_string(Prefix, ?th_prefix_len)],
-    H = list_to_binary(H0),
-    512 = byte_size(H),				%Assertion.
-    ChksumString = to_octal(checksum(H), 6, [0,$\s]),
-    <<Before:?th_chksum/binary,_:?th_chksum_len/binary,After/binary>> = H,
-    [Before,ChksumString,After].
-
-file_type(regular) -> $0;
-file_type(symlink) -> $2;
-file_type(directory) -> $5.
-    
-to_octal(Int, Count) when Count > 1 ->
-    to_octal(Int, Count-1, [0]).
-
-to_octal(_, 0, Result) -> Result;
-to_octal(Int, Count, Result) ->
-    to_octal(Int div 8, Count-1, [Int rem 8 + $0|Result]).
-
-to_string(Str0, Count) ->
-    Str = case file:native_name_encoding() of
-	      utf8 ->
-		  unicode:characters_to_binary(Str0);
-	      latin1 ->
-		  list_to_binary(Str0)
-	  end,
-    case byte_size(Str) of
-	Size when Size < Count ->
-	    [Str|zeroes(Count-Size)];
-	_ -> Str
+
+add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
+    ok;
+add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
+    FullName = filename:join(Dir, Name),
+    NameInArchive = filename:join(DirInArchive, Name),
+    Res = case Info(FullName) of
+              {error, Reason} ->
+                  {error, {FullName, Reason}};
+              {ok, #file_info{type=directory}=Fi} ->
+                  add_directory(Reader, FullName, NameInArchive, Fi, Opts);
+              {ok, #file_info{type=symlink}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  {ok, Linkname} = file:read_link(FullName),
+                  Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
+                  add_header(Reader, Header, Opts);
+              {ok, #file_info{type=regular}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  Header = fileinfo_to_header(NameInArchive, Fi, false),
+                  {ok, Reader2} = add_header(Reader, Header, Opts),
+                  FileSize = Header#tar_header.size,
+                  {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
+                  Padding = skip_padding(FileSize),
+                  Pad = <<0:Padding/unit:8>>,
+                  do_write(Reader3, Pad);
+              {ok, #file_info{}=Fi} ->
+                  add_verbose(Opts, "a ~ts~n", [NameInArchive]),
+                  Header = fileinfo_to_header(NameInArchive, Fi, false),
+                  add_header(Reader, Header, Opts)
+          end,
+    case Res of
+        ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
+        {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
+        {error, _} = Err -> Err
     end.
 
-%% Pads out end of file.
-
-pad_file(File) ->
-    {ok,Position} = do_position(File, {cur,0}),
-    %% There must be at least two zero records at the end.
-    Fill = case ?block_size - (Position rem ?block_size) of
-	       Fill0 when Fill0 < 2*?record_size ->
-		   %% We need to another block here to ensure that there
-		   %% are at least two zero records at the end.
-		   Fill0 + ?block_size;
-	       Fill0 ->
-		   %% Large enough.
-		   Fill0
-	   end,
-    do_write(File, zeroes(Fill)).
-
-split_filename(Name) when length(Name) =< ?th_name_len ->
-    {"", Name};
-split_filename(Name0) ->
-    split_filename(lists:reverse(filename:split(Name0)), [], [], 0).
-
-split_filename([Comp|Rest], Prefix, Suffix, Len) 
-  when Len+length(Comp) < ?th_name_len ->
-    split_filename(Rest, Prefix, [Comp|Suffix], Len+length(Comp)+1);
-split_filename([Comp|Rest], Prefix, Suffix, Len) ->
-    split_filename(Rest, [Comp|Prefix], Suffix, Len+length(Comp)+1);
-split_filename([], Prefix, Suffix, _) ->
-    {filename:join(Prefix),filename:join(Suffix)}.
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% 	Retrieving files from a tape archive.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Options used when reading a tar archive.
-
--record(read_opts,
-	{cwd             :: string(),		% Current working directory.
-	 keep_old_files = false :: boolean(),	% Owerwrite or not.
-	 files = all,				% Set of files to extract
-						% (or all).
-	 output = file   :: 'file' | 'memory',
-	 open_mode = [],			% Open mode options.
-	 verbose = false :: boolean()}).	% Verbose on/off.
+format_string(String, Size) when length(String) > Size ->
+    throw({error, {write_string, field_too_long}});
+format_string(String, Size) ->
+    Ascii = to_ascii(String),
+    if byte_size(Ascii) < Size ->
+            [Ascii, 0];
+       true ->
+            Ascii
+    end.
 
-extract_opts(List) ->
-    extract_opts(List, default_options()).
+format_octal(Octal) ->
+    iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
+
+add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
+    {ok, Iodata} = build_header(Header, Opts),
+    do_write(Reader, Iodata).
+
+write_to_block(Block, IoData, Start) when is_list(IoData) ->
+    write_to_block(Block, iolist_to_binary(IoData), Start);
+write_to_block(Block, Bin, Start) when is_binary(Bin) ->
+    Size = byte_size(Bin),
+    <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
+    <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
+
+build_header(#tar_header{}=Header, Opts) ->
+    #tar_header{
+       name=Name,
+       mode=Mode,
+       uid=Uid,
+       gid=Gid,
+       size=Size,
+       typeflag=Type,
+       linkname=Linkname,
+       uname=Uname,
+       gname=Gname,
+       devmajor=Devmaj,
+       devminor=Devmin
+      } = Header,
+    Mtime = datetime_to_posix(Header#tar_header.mtime),
+
+    Block0 = ?ZERO_BLOCK,
+    {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
+    Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
+    {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
+    {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
+    {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
+    {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
+    {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
+    {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
+                                  Linkname, ?PAX_LINKPATH, Pax5),
+    {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
+                                  Uname, ?PAX_UNAME, Pax6),
+    {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
+                                   Gname, ?PAX_GNAME, Pax7),
+    {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
+                                    Devmaj, ?PAX_NONE, Pax8),
+    {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
+                                     Devmin, ?PAX_NONE, Pax9),
+    {Block13, Pax11} = set_path(Block12, Pax10),
+    PaxEntry = case maps:size(Pax11) of
+                   0 -> [];
+                   _ -> build_pax_entry(Header, Pax11, Opts)
+               end,
+    Block14 = set_format(Block13, ?FORMAT_USTAR),
+    Block15 = set_checksum(Block14),
+    {ok, [PaxEntry, Block15]}.
+
+set_path(Block0, Pax) ->
+     %% only use ustar header when name is too long
+    case maps:get(?PAX_PATH, Pax, nil) of
+        nil ->
+            {Block0, Pax};
+        PaxPath ->
+            case split_ustar_path(PaxPath) of
+                {ok, UstarName, UstarPrefix} ->
+                    {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
+                                               UstarName, ?PAX_NONE, #{}),
+                    {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
+                                               UstarPrefix, ?PAX_NONE, #{}),
+                    {Block2, maps:remove(?PAX_PATH, Pax)};
+                false ->
+                    {Block0, Pax}
+            end
+    end.
 
-table_opts(List) ->
-    read_opts(List, default_options()).
+set_format(Block0, Format)
+  when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
+    Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
+    write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
+set_format(_Block, Format) ->
+    throw({error, {invalid_format, Format}}).
+
+set_checksum(Block) ->
+    Checksum = compute_checksum(Block),
+    write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
+
+build_pax_entry(Header, PaxAttrs, Opts) ->
+    Path = Header#tar_header.name,
+    Filename = filename:basename(Path),
+    Dir = filename:dirname(Path),
+    Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
+    AsciiPath = to_ascii(Path2),
+    Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
+                    binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
+               true ->
+                    AsciiPath
+            end,
+    Keys = maps:keys(PaxAttrs),
+    SortedKeys = lists:sort(Keys),
+    PaxFile = build_pax_file(SortedKeys, PaxAttrs),
+    Size = byte_size(PaxFile),
+    Padding = (?BLOCK_SIZE -
+                   (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
+    Pad = <<0:Padding/unit:8>>,
+    PaxHeader = #tar_header{
+                   name=unicode:characters_to_list(Path3),
+                   size=Size,
+                   mtime=Header#tar_header.mtime,
+                   atime=Header#tar_header.atime,
+                   ctime=Header#tar_header.ctime,
+                   typeflag=?TYPE_X_HEADER
+                  },
+    {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
+    [PaxHeaderData, PaxFile, Pad].
+
+build_pax_file(Keys, PaxAttrs) ->
+    build_pax_file(Keys, PaxAttrs, []).
+build_pax_file([], _, Acc) ->
+    unicode:characters_to_binary(Acc);
+build_pax_file([K|Rest], Attrs, Acc) ->
+    V = maps:get(K, Attrs),
+    Size = sizeof(K) + sizeof(V) + 3,
+    Size2 = sizeof(Size) + Size,
+    Key = to_string(K),
+    Value = to_string(V),
+    Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
+    if byte_size(Record) =/= Size2 ->
+            Size3 = byte_size(Record),
+            Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
+            build_pax_file(Rest, Attrs, [Acc, Record2]);
+       true ->
+            build_pax_file(Rest, Attrs, [Acc, Record])
+    end.
 
-default_options() ->
-    {ok, Cwd} = file:get_cwd(),
-    #read_opts{cwd=Cwd}.
+sizeof(Bin) when is_binary(Bin) ->
+    byte_size(Bin);
+sizeof(List) when is_list(List) ->
+    length(List);
+sizeof(N) when is_integer(N) ->
+    byte_size(integer_to_binary(N));
+sizeof(N) when is_float(N) ->
+    byte_size(float_to_binary(N)).
+
+to_string(Bin) when is_binary(Bin) ->
+    unicode:characters_to_list(Bin);
+to_string(List) when is_list(List) ->
+    List;
+to_string(N) when is_integer(N) ->
+    integer_to_list(N);
+to_string(N) when is_float(N) ->
+    float_to_list(N).
+
+split_ustar_path(Path) ->
+    Len = length(Path),
+    NotAscii = not is_ascii(Path),
+    if Len =< ?V7_NAME_LEN; NotAscii ->
+            false;
+       true ->
+            PathBin = binary:list_to_bin(Path),
+            case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
+                [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
+                    false;
+                Parts ->
+                    case lists:last(Parts) of
+                        Name when byte_size(Name) >= ?V7_NAME_LEN ->
+                            false;
+                        Name ->
+                            Parts2 = lists:sublist(Parts, length(Parts) - 1),
+                            join_split_ustar_path(Parts2, {ok, Name, nil})
+                    end
+            end
+    end.
 
-%% Parse options for extract.
+join_split_ustar_path([], Acc) ->
+    Acc;
+join_split_ustar_path([Part|_], {ok, _, nil})
+  when byte_size(Part) > ?USTAR_PREFIX_LEN ->
+    false;
+join_split_ustar_path([Part|_], {ok, _Name, Acc})
+  when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
+    false;
+join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
+    join_split_ustar_path(Rest, {ok, Name, Part});
+join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
+    join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
+
+datetime_to_posix(DateTime) ->
+    Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
+    Secs = calendar:datetime_to_gregorian_seconds(DateTime),
+    case Secs - Epoch of
+        N when N < 0 -> 0;
+        N -> N
+    end.
 
-extract_opts([keep_old_files|Rest], Opts) ->
-    extract_opts(Rest, Opts#read_opts{keep_old_files=true});
-extract_opts([{cwd, Cwd}|Rest], Opts) ->
-    extract_opts(Rest, Opts#read_opts{cwd=Cwd});
-extract_opts([{files, Files}|Rest], Opts) ->
-    Set = ordsets:from_list(Files),
-    extract_opts(Rest, Opts#read_opts{files=Set});
-extract_opts([memory|Rest], Opts) ->
-    extract_opts(Rest, Opts#read_opts{output=memory});
-extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
-    extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
-extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
-    extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
-extract_opts([verbose|Rest], Opts) ->
-    extract_opts(Rest, Opts#read_opts{verbose=true});
-extract_opts([Other|Rest], Opts) ->
-    extract_opts(Rest, read_opts([Other], Opts));
-extract_opts([], Opts) ->
-    Opts.
+write_octal(Block, Pos, Size, X) ->
+    Octal = zero_pad(format_octal(X), Size-1),
+    if byte_size(Octal) < Size ->
+            write_to_block(Block, Octal, Pos);
+       true ->
+            throw({error, {write_failed, octal_field_too_long}})
+    end.
 
-%% Common options for all read operations.
+write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
+    NotAscii = not is_ascii(Str),
+    if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
+            Pax1 = maps:put(PaxAttr, Str, Pax0),
+            {Block, Pax1};
+       true ->
+            Formatted = format_string(Str, Size),
+            {write_to_block(Block, Formatted, Pos), Pax0}
+    end.
+write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
+    %% attempt octal
+    Octal = zero_pad(format_octal(X), Size-1),
+    if byte_size(Octal) < Size ->
+            {write_to_block(Block, [Octal, 0], Pos), Pax0};
+       PaxAttr =/= ?PAX_NONE ->
+            Pax1 = maps:put(PaxAttr, X, Pax0),
+            {Block, Pax1};
+       true ->
+            throw({error, {write_failed, numeric_field_too_long}})
+    end.
 
-read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
-    read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
-read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
-    read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
-read_opts([verbose|Rest], Opts) ->
-    read_opts(Rest, Opts#read_opts{verbose=true});
-read_opts([_|Rest], Opts) ->
-    read_opts(Rest, Opts);
-read_opts([], Opts) ->
-    Opts.
+zero_pad(Str, Size) when byte_size(Str) >= Size ->
+    Str;
+zero_pad(Str, Size) ->
+    Padding = Size - byte_size(Str),
+    Pad = binary:copy(<<$0>>, Padding),
+    <<Pad/binary, Str/binary>>.
 
-foldl_read({AccessMode,TD={tar_descriptor,_UsrHandle,_AccessFun}}, Fun, Accu, Opts) ->
-    case AccessMode of
-	read ->
-	    foldl_read0(TD, Fun, Accu, Opts);
-	_ ->
-	    {error,{read_mode_expected,AccessMode}}
-    end;
-foldl_read(TarName, Fun, Accu, Opts) ->
-    case open(TarName, [read|Opts#read_opts.open_mode]) of
-	{ok, {read, File}} ->
-	    Result = foldl_read0(File, Fun, Accu, Opts),
-	    ok = do_close(File),
-	    Result;
-	Error ->
-	    Error
+
+%%%================================================================
+%% Functions for creating or modifying tar archives
+
+read_block(Reader) ->
+    case do_read(Reader, ?BLOCK_SIZE) of
+        eof ->
+            throw({error, eof});
+        %% Two zero blocks mark the end of the archive
+        {ok, ?ZERO_BLOCK, Reader1} ->
+            case do_read(Reader1, ?BLOCK_SIZE) of
+                eof ->
+                    % This is technically a malformed end-of-archive marker,
+                    % as two ZERO_BLOCKs are expected as the marker,
+                    % but if we've already made it this far, we should just ignore it
+                    eof;
+                {ok, ?ZERO_BLOCK, _Reader2} ->
+                    eof;
+                {ok, _Block, _Reader2} ->
+                    throw({error, invalid_end_of_archive});
+                {error,_} = Err ->
+                    throw(Err)
+            end;
+        {ok, Block, Reader1} when is_binary(Block) ->
+            {ok, Block, Reader1};
+        {error, _} = Err ->
+            throw(Err)
     end.
 
-foldl_read0(File, Fun, Accu, Opts) ->
-    case catch foldl_read1(Fun, Accu, File, Opts) of
-	{'EXIT', Reason} ->
-	    exit(Reason);
-	{error, {Reason, Format, Args}} ->
-	    read_verbose(Opts, Format, Args),
-	    {error, Reason};
-	{error, Reason} ->
-	    {error, Reason};
-	Ok ->
-	    Ok
+get_header(#reader{}=Reader) ->
+    case read_block(Reader) of
+        eof ->
+            eof;
+        {ok, Block, Reader1} ->
+            convert_header(Block, Reader1)
     end.
 
-foldl_read1(Fun, Accu0, File, Opts) ->
-    case get_header(File) of
-	eof ->
-	    Fun(eof, File, Opts, Accu0);
-	Header ->
-	    {ok, NewAccu} = Fun(Header, File, Opts, Accu0),
-	    foldl_read1(Fun, NewAccu, File, Opts)
+%% Converts the tar header to a record.
+to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    #header_v7{
+       name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
+       mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
+       uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
+       gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
+       size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
+       mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
+       checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
+       typeflag=binary:at(Bin, ?V7_TYPE),
+       linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
+      };
+to_v7(_) ->
+    {error, header_block_too_small}.
+
+to_gnu(#header_v7{}=V7, Bin)
+  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    #header_gnu{
+       header_v7=V7,
+       magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
+       version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
+       uname=binary_part(Bin, 265, 32),
+       gname=binary_part(Bin, 297, 32),
+       devmajor=binary_part(Bin, 329, 8),
+       devminor=binary_part(Bin, 337, 8),
+       atime=binary_part(Bin, 345, 12),
+       ctime=binary_part(Bin, 357, 12),
+       sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
+       real_size=binary_part(Bin, 483, 12)
+      }.
+
+to_star(#header_v7{}=V7, Bin)
+  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    #header_star{
+       header_v7=V7,
+       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
+       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
+       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
+       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
+       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
+       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
+       prefix=binary_part(Bin, 345, 131),
+       atime=binary_part(Bin, 476, 12),
+       ctime=binary_part(Bin, 488, 12),
+       trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
+      }.
+
+to_ustar(#header_v7{}=V7, Bin)
+  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    #header_ustar{
+       header_v7=V7,
+       magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
+       version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
+       uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
+       gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
+       devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
+       devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
+       prefix=binary_part(Bin, 345, 155)
+      }.
+
+to_sparse_array(Bin) when is_binary(Bin) ->
+    MaxEntries = byte_size(Bin) div 24,
+    IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
+    Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
+    #sparse_array{
+       entries=Entries,
+       max_entries=MaxEntries,
+       is_extended=IsExtended
+      }.
+
+parse_sparse_entries(<<>>, _, Acc) ->
+    Acc;
+parse_sparse_entries(_, -1, Acc) ->
+    Acc;
+parse_sparse_entries(Bin, N, Acc) ->
+    case to_sparse_entry(binary_part(Bin, N*24, 24)) of
+        nil ->
+            parse_sparse_entries(Bin, N-1, Acc);
+        Entry = #sparse_entry{} ->
+            parse_sparse_entries(Bin, N-1, [Entry|Acc])
     end.
 
-table1(eof, _, _, Result) ->
-    {ok, lists:reverse(Result)};
-table1(Header = #tar_header{}, File, #read_opts{verbose=true}, Result) ->
-    #tar_header{name=Name, size=Size, mtime=Mtime, typeflag=Type,
-		mode=Mode, uid=Uid, gid=Gid} = Header,
-    skip(File, Size),
-    {ok, [{Name, Type, Size, posix_to_erlang_time(Mtime), Mode, Uid, Gid}|Result]};
-table1(#tar_header{name=Name, size=Size}, File, _, Result) ->
-    skip(File, Size),
-    {ok, [Name|Result]}.
-
-extract1(eof, _, _, Acc) ->
-    if
-	is_list(Acc) ->
-	    {ok, lists:reverse(Acc)};
-	true ->
-	    Acc
-    end;
-extract1(Header, File, Opts, Acc) ->
-    Name = Header#tar_header.name,
-    case check_extract(Name, Opts) of
-	true ->
-	    {ok, Bin} = get_element(File, Header),
-	    case write_extracted_element(Header, Bin, Opts) of
-		ok ->
-		    {ok, Acc};
-		{ok, NameBin} when is_list(Acc) ->
-		    {ok, [NameBin | Acc]};
-		{ok, NameBin} when Acc =:= ok ->
-		    {ok, [NameBin]}
-	    end;
-	false ->
-	    ok = skip(File, Header#tar_header.size),
-	    {ok, Acc}
+-define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
+to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
+    OffsetBin = binary_part(Bin, 0, 12),
+    NumBytesBin = binary_part(Bin, 12, 12),
+    case {OffsetBin, NumBytesBin} of
+        {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
+            nil;
+        _ ->
+            #sparse_entry{
+               offset=parse_numeric(OffsetBin),
+               num_bytes=parse_numeric(NumBytesBin)}
     end.
 
-%% Checks if the file Name should be extracted.
+-spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
+                                  | ?FORMAT_UNKNOWN
+                                  | {error, term()}.
+get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    do_get_format(to_v7(Bin), Bin).
+
+do_get_format({error, _} = Err, _Bin) ->
+    Err;
+do_get_format(#header_v7{}=V7, Bin)
+  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    Checksum = parse_octal(V7#header_v7.checksum),
+    Chk1 = compute_checksum(Bin),
+    Chk2 = compute_signed_checksum(Bin),
+    if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
+            ?FORMAT_UNKNOWN;
+       true ->
+            %% guess magic
+            Ustar = to_ustar(V7, Bin),
+            Star = to_star(V7, Bin),
+            Magic = Ustar#header_ustar.magic,
+            Version = Ustar#header_ustar.version,
+            Trailer = Star#header_star.trailer,
+            Format = if
+                         Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
+                             ?FORMAT_STAR;
+                         Magic =:= ?MAGIC_USTAR ->
+                             ?FORMAT_USTAR;
+                         Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
+                             ?FORMAT_GNU;
+                         true ->
+                             ?FORMAT_V7
+                     end,
+            {ok, Format, V7}
+    end.
 
-check_extract(_, #read_opts{files=all}) ->
+unpack_format(Format, #header_v7{}=V7, Bin, Reader)
+  when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
+    Mtime = posix_to_erlang_time(parse_numeric(V7#header_v7.mtime)),
+    Header0 = #tar_header{
+                 name=parse_string(V7#header_v7.name),
+                 mode=parse_numeric(V7#header_v7.mode),
+                 uid=parse_numeric(V7#header_v7.uid),
+                 gid=parse_numeric(V7#header_v7.gid),
+                 size=parse_numeric(V7#header_v7.size),
+                 mtime=Mtime,
+                 atime=Mtime,
+                 ctime=Mtime,
+                 typeflag=V7#header_v7.typeflag,
+                 linkname=parse_string(V7#header_v7.linkname)
+                },
+    Typeflag = Header0#tar_header.typeflag,
+    Header1 = if Format > ?FORMAT_V7 ->
+                      unpack_modern(Format, V7, Bin, Header0);
+                 true ->
+                      Name = Header0#tar_header.name,
+                      Header0#tar_header{name=safe_join_path("", Name)}
+              end,
+    HeaderOnly = is_header_only_type(Typeflag),
+    Header2 = if HeaderOnly ->
+                      Header1#tar_header{size=0};
+                 true ->
+                      Header1
+              end,
+    if Typeflag =:= ?TYPE_GNU_SPARSE ->
+            Gnu = to_gnu(V7, Bin),
+            RealSize = parse_numeric(Gnu#header_gnu.real_size),
+            {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
+            Header3 = Header2#tar_header{size=RealSize},
+            {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
+       true ->
+            FileReader = #reg_file_reader{
+                            handle=Reader,
+                            num_bytes=Header2#tar_header.size,
+                            size=Header2#tar_header.size,
+                            pos = 0
+                           },
+            {Header2, FileReader}
+    end.
+
+unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
+  when is_binary(Bin) ->
+    Typeflag = Header0#tar_header.typeflag,
+    Ustar = to_ustar(V7, Bin),
+    H0 = Header0#tar_header{
+            uname=parse_string(Ustar#header_ustar.uname),
+            gname=parse_string(Ustar#header_ustar.gname)},
+    H1 = if Typeflag =:= ?TYPE_CHAR
+            orelse Typeflag =:= ?TYPE_BLOCK ->
+                Ma = parse_numeric(Ustar#header_ustar.devmajor),
+                Mi = parse_numeric(Ustar#header_ustar.devminor),
+                H0#tar_header{
+                    devmajor=Ma,
+                    devminor=Mi
+                };
+            true ->
+                H0
+        end,
+    {Prefix, H2} = case Format of
+                        ?FORMAT_USTAR ->
+                            {parse_string(Ustar#header_ustar.prefix), H1};
+                        ?FORMAT_STAR ->
+                            Star = to_star(V7, Bin),
+                            Prefix0 = parse_string(Star#header_star.prefix),
+                            Atime0 = Star#header_star.atime,
+                            Atime = posix_to_erlang_time(parse_numeric(Atime0)),
+                            Ctime0 = Star#header_star.ctime,
+                            Ctime = posix_to_erlang_time(parse_numeric(Ctime0)),
+                            {Prefix0, H1#tar_header{
+                                        atime=Atime,
+                                        ctime=Ctime
+                                    }};
+                        _ ->
+                            {"", H1}
+                    end,
+    Name = H2#tar_header.name,
+    H2#tar_header{name=safe_join_path(Prefix, Name)}.
+
+
+safe_join_path([], Name) ->
+    strip_slashes(Name, both);
+safe_join_path(Prefix, []) ->
+    strip_slashes(Prefix, right);
+safe_join_path(Prefix, Name) ->
+    filename:join(strip_slashes(Prefix, right), strip_slashes(Name, both)).
+
+strip_slashes(Str, Direction) ->
+    string:strip(Str, Direction, $/).
+
+new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
+    true = validate_sparse_entries(Sparsemap, RealSize),
+    #sparse_file_reader{
+       handle = Reader,
+       num_bytes = RealSize,
+       pos = 0,
+       size = RealSize,
+       sparse_map = Sparsemap}.
+
+validate_sparse_entries(Entries, RealSize) ->
+    validate_sparse_entries(Entries, RealSize, 0, 0).
+validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
     true;
-check_extract(Name, #read_opts{files=Files}) ->
-    ordsets:is_element(Name, Files).
+validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
+    Offset = Entry#sparse_entry.offset,
+    NumBytes = Entry#sparse_entry.num_bytes,
+    if
+        Offset > ?MAX_INT64-NumBytes ->
+            throw({error, {invalid_sparse_map_entry, offset_too_large}});
+        Offset+NumBytes > RealSize ->
+            throw({error, {invalid_sparse_map_entry, offset_too_large}});
+        I > 0 andalso LastOffset > Offset ->
+            throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
+        true ->
+            ok
+    end,
+    validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
+
+
+-spec parse_sparse_map(header_gnu(), reader_type()) ->
+                              {[sparse_entry()], reader_type()}.
+parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
+  when Sparse#sparse_array.is_extended ->
+    parse_sparse_map(Sparse, Reader, []);
+parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
+    {Sparse#sparse_array.entries, Reader}.
+parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
+    case read_block(Reader) of
+        eof ->
+            throw({error, eof});
+        {ok, Block, Reader2} ->
+            Sparse2 = to_sparse_array(Block),
+            parse_sparse_map(Sparse2, Reader2, Entries++Acc)
+    end;
+parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
+    Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
+                                A =< B
+                        end, Entries++Acc),
+    {Sorted, Reader}.
+
+%% Defined by taking the sum of the unsigned byte values of the
+%% entire header record, treating the checksum bytes to as ASCII spaces
+compute_checksum(<<H1:?V7_CHKSUM/binary,
+                   H2:?V7_CHKSUM_LEN/binary,
+                   Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
+                   _/binary>>) ->
+    C0 = checksum(H1) + (byte_size(H2) * $\s),
+    C1 = checksum(Rest),
+    C0 + C1.
+
+compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
+                          H2:?V7_CHKSUM_LEN/binary,
+                          Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
+                          _/binary>>) ->
+    C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
+    C1 = signed_checksum(Rest),
+    C0 + C1.
 
-get_header(File) ->
-    case do_read(File, ?record_size) of
-	eof ->
-	    throw({error,eof});
-	{ok, Bin} when is_binary(Bin) ->
-	    convert_header(Bin);
-	{ok, List} ->
-	    convert_header(list_to_binary(List));
-	{error, Reason} ->
-	    throw({error, Reason})
-    end.
+%% Returns the checksum of a binary.
+checksum(Bin) -> checksum(Bin, 0).
+checksum(<<A/unsigned,Rest/binary>>, Sum) ->
+    checksum(Rest, Sum+A);
+checksum(<<>>, Sum) -> Sum.
 
-%% Converts the tar header to a record.
+signed_checksum(Bin) -> signed_checksum(Bin, 0).
+signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
+    signed_checksum(Rest, Sum+A);
+signed_checksum(<<>>, Sum) -> Sum.
+
+-spec parse_numeric(binary()) -> non_neg_integer().
+parse_numeric(<<>>) ->
+    0;
+parse_numeric(<<First, _/binary>> = Bin) ->
+    %% check for base-256 format first
+    %% if the bit is set, then all following bits constitute a two's
+    %% complement encoded number in big-endian byte order
+    if
+        First band 16#80 =/= 0 ->
+            %% Handling negative numbers relies on the following identity:
+            %%     -a-1 == ^a
+            %% If the number is negative, we use an inversion mask to invert
+            %% the data bytes and treat the value as an unsigned number
+            Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
+            Bytes = binary:bin_to_list(Bin),
+            Reducer = fun (C, {I, X}) ->
+                              C1 = C bxor Inv,
+                              C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
+                              if (X bsr 56) > 0 ->
+                                      throw({error,integer_overflow});
+                                 true ->
+                                      {I+1, (X bsl 8) bor C2}
+                              end
+                      end,
+            {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
+            if (N bsr 63) > 0 ->
+                    throw({error, integer_overflow});
+               true ->
+                    if Inv =:= 16#FF ->
+                            -1 bxor N;
+                       true ->
+                            N
+                    end
+            end;
+        true ->
+            %% normal case is an octal number
+            parse_octal(Bin)
+    end.
 
-convert_header(Bin) when byte_size(Bin) =:= ?record_size ->
-    case verify_checksum(Bin) of
-	ok ->
-	    Hd = #tar_header{name=get_name(Bin),
-			     mode=from_octal(Bin, ?th_mode, ?th_mode_len),
-			     uid=from_octal(Bin, ?th_uid, ?th_uid_len),
-			     gid=from_octal(Bin, ?th_gid, ?th_gid_len),
-			     size=from_octal(Bin, ?th_size, ?th_size_len),
-			     mtime=from_octal(Bin, ?th_mtime, ?th_mtime_len),
-			     linkname=from_string(Bin,
-						  ?th_linkname, ?th_linkname_len),
-			     typeflag=typeflag(Bin)},
-	    convert_header1(Hd);
-	eof ->
-	    eof
+parse_octal(Bin) when is_binary(Bin) ->
+    %% skip leading/trailing zero bytes and spaces
+    do_parse_octal(Bin, <<>>).
+do_parse_octal(<<>>, <<>>) ->
+    0;
+do_parse_octal(<<>>, Acc) ->
+    case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
+        {error, _} -> throw({error, invalid_tar_checksum});
+        {ok, [Octal], []} -> Octal;
+        {ok, _, _} -> throw({error, invalid_tar_checksum})
     end;
-convert_header(Bin) when byte_size(Bin) =:= 0 ->
+do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
+    do_parse_octal(Rest, Acc);
+do_parse_octal(<<0, Rest/binary>>, Acc) ->
+    do_parse_octal(Rest, Acc);
+do_parse_octal(<<C, Rest/binary>>, Acc) ->
+    do_parse_octal(Rest, <<Acc/binary, C>>).
+
+parse_string(Bin) when is_binary(Bin) ->
+    do_parse_string(Bin, <<>>).
+do_parse_string(<<>>, Acc) ->
+    case unicode:characters_to_list(Acc) of
+        Str when is_list(Str) ->
+            Str;
+        {incomplete, _Str, _Rest} ->
+            binary:bin_to_list(Acc);
+        {error, _Str, _Rest} ->
+            throw({error, {bad_header, invalid_string}})
+    end;
+do_parse_string(<<0, _/binary>>, Acc) ->
+    do_parse_string(<<>>, Acc);
+do_parse_string(<<C, Rest/binary>>, Acc) ->
+    do_parse_string(Rest, <<Acc/binary, C>>).
+
+convert_header(Bin, #reader{pos=Pos}=Reader)
+  when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
+    case get_format(Bin) of
+        ?FORMAT_UNKNOWN ->
+            throw({error, bad_header});
+        {ok, Format, V7} ->
+            unpack_format(Format, V7, Bin, Reader);
+        {error, Reason} ->
+            throw({error, {bad_header, Reason}})
+    end;
+convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
+    throw({error, misaligned_read, Pos});
+convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
     eof;
-convert_header(_Bin) ->
+convert_header(_Bin, _Reader) ->
     throw({error, eof}).
 
-%% Basic sanity.  Better set the element size to zero here if the type
-%% always is of zero length.
-
-convert_header1(H) when H#tar_header.typeflag =:= symlink, H#tar_header.size =/= 0 ->
-    convert_header1(H#tar_header{size=0});
-convert_header1(H) when H#tar_header.typeflag =:= directory, H#tar_header.size =/= 0 ->
-    convert_header1(H#tar_header{size=0});
-convert_header1(Header) ->
-    Header.
-
-typeflag(Bin) ->
-    [T] = binary_to_list(Bin, ?th_typeflag+1, ?th_typeflag+1),
-    case T of
-	0  -> regular;
-	$0 -> regular;
-	$1 -> link;
-	$2 -> symlink;
-	$3 -> char;
-	$4 -> block;
-	$5 -> directory;
-	$6 -> fifo;
-	$7 -> regular;
-	_  -> unknown
+%% Creates a partially-populated header record based
+%% on the provided file_info record. If the file is
+%% a symlink, then `link` is used as the link target.
+%% If the file is a directory, a slash is appended to the name.
+fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
+    BaseHeader = #tar_header{name=Name,
+                             mtime=Fi#file_info.mtime,
+                             atime=Fi#file_info.atime,
+                             ctime=Fi#file_info.ctime,
+                             mode=Fi#file_info.mode,
+                             uid=Fi#file_info.uid,
+                             gid=Fi#file_info.gid,
+                             typeflag=?TYPE_REGULAR},
+    do_fileinfo_to_header(BaseHeader, Fi, Link).
+
+do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
+    Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
+do_fileinfo_to_header(#tar_header{name=Name}=Header,
+                      #file_info{type=directory}, _Link) ->
+    Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
+do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
+    Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
+do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
+  when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
+    Header#tar_header{typeflag=?TYPE_CHAR,
+                      devmajor=Fi#file_info.major_device,
+                      devminor=Fi#file_info.minor_device};
+do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
+  when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
+    Header#tar_header{typeflag=?TYPE_BLOCK,
+                      devmajor=Fi#file_info.major_device,
+                      devminor=Fi#file_info.minor_device};
+do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
+  when (Mode band ?S_IFMT) =:= ?S_FIFO ->
+    Header#tar_header{typeflag=?TYPE_FIFO};
+do_fileinfo_to_header(Header, Fi, _Link) ->
+    {error, {invalid_file_type, Header#tar_header.name, Fi}}.
+
+is_ascii(Str) when is_list(Str) ->
+    not lists:any(fun (Char) -> Char >= 16#80 end, Str);
+is_ascii(Bin) when is_binary(Bin) ->
+    is_ascii1(Bin).
+
+is_ascii1(<<>>) ->
+    true;
+is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
+    false;
+is_ascii1(<<_, Rest/binary>>) ->
+    is_ascii1(Rest).
+
+to_ascii(Str) when is_list(Str) ->
+    case is_ascii(Str) of
+        true ->
+            unicode:characters_to_binary(Str);
+        false ->
+            Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
+            unicode:characters_to_binary(Chars)
+    end;
+to_ascii(Bin) when is_binary(Bin) ->
+    to_ascii(Bin, <<>>).
+to_ascii(<<>>, Acc) ->
+    Acc;
+to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
+    to_ascii(Rest, <<Acc/binary,C>>);
+to_ascii(<<_, Rest/binary>>, Acc) ->
+    to_ascii(Rest, Acc).
+
+is_header_only_type(?TYPE_SYMLINK) -> true;
+is_header_only_type(?TYPE_LINK)    -> true;
+is_header_only_type(?TYPE_DIR)     -> true;
+is_header_only_type(_) -> false.
+
+posix_to_erlang_time(Sec) ->
+    OneMillion = 1000000,
+    Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}),
+    erlang:universaltime_to_localtime(Time).
+
+foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
+  when is_function(Fun,4) ->
+    case foldl_read0(Reader, Fun, Accu, Opts) of
+        {ok, Result, _Reader2} ->
+            Result;
+        {error, _} = Err ->
+            Err
+    end;
+foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
+    {error, {read_mode_expected, Access}};
+foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
+  when is_function(Fun,4) ->
+    try open(TarName, [read|Opts#read_opts.open_mode]) of
+        {ok, #reader{access=read}=Reader} ->
+            foldl_read(Reader, Fun, Accu, Opts);
+        {error, _} = Err ->
+            Err
+    catch
+        throw:Err ->
+            Err
     end.
 
-%% Get the name of the file from the prefix and name fields of the
-%% tar header.
-
-get_name(Bin0) ->
-    List0 = get_name_raw(Bin0),
-    case file:native_name_encoding() of
-	utf8 ->
-	    Bin = list_to_binary(List0),
-	    case unicode:characters_to_list(Bin) of
-		{error,_,_} ->
-		    List0;
-		List when is_list(List) ->
-		    List
-	    end;
-	latin1 ->
-	    List0
+foldl_read0(Reader, Fun, Accu, Opts) ->
+    try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
+        {ok,_,_} = Ok ->
+            Ok
+    catch
+        throw:{error, {Reason, Format, Args}} ->
+            read_verbose(Opts, Format, Args),
+            {error, Reason};
+        throw:Err ->
+            Err
     end.
 
-get_name_raw(Bin) ->
-    Name = from_string(Bin, ?th_name, ?th_name_len),
-    case binary_to_list(Bin, ?th_prefix+1, ?th_prefix+1) of
-	[0] ->
-	    Name;
-	[_] ->
-	    Prefix = binary_to_list(Bin, ?th_prefix+1, byte_size(Bin)),
-	    lists:reverse(remove_nulls(Prefix), [$/|Name])
+foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
+    {ok, Reader1} = skip_unread(Reader0),
+    case get_header(Reader1) of
+        eof ->
+            Fun(eof, Reader1, Opts, Accu0);
+        {Header, Reader2} ->
+            case Header#tar_header.typeflag of
+                ?TYPE_X_HEADER ->
+                    {ExtraHeaders2, Reader3} = parse_pax(Reader2),
+                    ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
+                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
+                ?TYPE_GNU_LONGNAME ->
+                    {RealName, Reader3} = get_real_name(Reader2),
+                    ExtraHeaders2 = maps:put(?PAX_PATH,
+                                             parse_string(RealName), ExtraHeaders),
+                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
+                ?TYPE_GNU_LONGLINK ->
+                    {RealName, Reader3} = get_real_name(Reader2),
+                    ExtraHeaders2 = maps:put(?PAX_LINKPATH,
+                                             parse_string(RealName), ExtraHeaders),
+                    foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
+                _ ->
+                    Header1 = merge_pax(Header, ExtraHeaders),
+                    {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
+                    foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
+            end
     end.
 
-from_string(Bin, Pos, Len) ->
-    lists:reverse(remove_nulls(binary_to_list(Bin, Pos+1, Pos+Len))).
-    
-%% Returns all characters up to (but not including) the first null
-%% character, in REVERSE order.
-
-remove_nulls(List) ->
-    remove_nulls(List, []).
-
-remove_nulls([0|_], Result) ->
-    remove_nulls([], Result);
-remove_nulls([C|Rest], Result) ->
-    remove_nulls(Rest, [C|Result]);
-remove_nulls([], Result) ->
-    Result.
-
-from_octal(Bin, Pos, Len) ->
-    from_octal(binary_to_list(Bin, Pos+1, Pos+Len)).
-
-from_octal([$\s|Rest]) ->
-    from_octal(Rest);
-from_octal([Digit|Rest]) when $0 =< Digit, Digit =< $7 ->
-    from_octal(Rest, Digit-$0);
-from_octal(Bin) when is_binary(Bin) ->
-    from_octal(binary_to_list(Bin));
-from_octal(Other) ->
-    throw({error, {bad_header, "Bad octal number: ~p", [Other]}}).
-
-from_octal([Digit|Rest], Result) when $0 =< Digit, Digit =< $7 ->
-    from_octal(Rest, Result*8+Digit-$0);
-from_octal([$\s|_], Result) ->
-    Result;
-from_octal([0|_], Result) ->
-    Result;
-from_octal(Other, _) ->
-    throw({error, {bad_header, "Bad contents in octal field: ~p", [Other]}}).
-
-%% Retrieves the next element from the archive.
-%% Returns {ok, Bin} | eof | {error, Reason}
-
-get_element(File, #tar_header{size = 0}) ->
-    skip_to_next(File),
-    {ok,<<>>};
-get_element(File, #tar_header{size = Size}) ->
-    case do_read(File, Size) of
-	{ok,Bin}=Res when byte_size(Bin) =:= Size ->
-	    skip_to_next(File),
-	    Res;
-	{ok,List} when length(List) =:= Size ->
-	    skip_to_next(File),
-	    {ok,list_to_binary(List)};
-	{ok,_} -> throw({error,eof});
-	{error, Reason} -> throw({error, Reason});
-	eof -> throw({error,eof})
+%% Applies all known PAX attributes to the current tar header
+-spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
+merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
+    do_merge_pax(Header, maps:to_list(ExtraHeaders)).
+
+do_merge_pax(Header, []) ->
+    Header;
+do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
+    do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
+do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
+    do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
+do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
+    do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
+do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
+    do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
+do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
+    Uid2 = binary_to_integer(Uid),
+    do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
+do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
+    Gid2 = binary_to_integer(Gid),
+    do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
+do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
+    Atime2 = parse_pax_time(Atime),
+    do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
+do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
+    Mtime2 = parse_pax_time(Mtime),
+    do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
+do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
+    Ctime2 = parse_pax_time(Ctime),
+    do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
+do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
+    Size2 = binary_to_integer(Size),
+    do_merge_pax(Header#tar_header{size=Size2}, Rest);
+do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
+    do_merge_pax(Header, Rest);
+do_merge_pax(Header, [_Ignore|Rest]) ->
+    do_merge_pax(Header, Rest).
+
+%% Returns the time since UNIX epoch as a datetime
+-spec parse_pax_time(binary()) -> calendar:datetime().
+parse_pax_time(Bin) when is_binary(Bin) ->
+    TotalNano = case binary:split(Bin, [<<$.>>]) of
+                    [SecondsStr, NanoStr0] ->
+                        Seconds = binary_to_integer(SecondsStr),
+                        if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
+                                %% right pad
+                                PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
+                                Padding = binary:copy(<<$0>>, PaddingN),
+                                NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
+                                Nano = binary_to_integer(NanoStr1),
+                                (Seconds*?BILLION)+Nano;
+                           byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
+                                %% right truncate
+                                NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
+                                Nano = binary_to_integer(NanoStr1),
+                                (Seconds*?BILLION)+Nano;
+                           true ->
+                                (Seconds*?BILLION)+binary_to_integer(NanoStr0)
+                        end;
+                    [SecondsStr] ->
+                        binary_to_integer(SecondsStr)*?BILLION
+                end,
+    %% truncate to microseconds
+    Micro = TotalNano div 1000,
+    Mega = Micro div 1000000000000,
+    Secs = Micro div 1000000 - (Mega*1000000),
+    Micro2 = Micro rem 1000000,
+    calendar:now_to_datetime({Mega, Secs, Micro2}).
+
+%% Given a regular file reader, reads the whole file and
+%% parses all extended attributes it contains.
+parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+    {#{}, Handle};
+parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
+    case do_read(Handle0, NumBytes) of
+        {ok, Bytes, Handle1} ->
+            do_parse_pax(Handle1, Bytes, #{});
+        {error, _} = Err ->
+            throw(Err)
     end.
 
-%% Verify the checksum in the header.  First try an unsigned addition
-%% of all bytes in the header (as it should be according to Posix).
-
-verify_checksum(Bin) ->
-    <<H1:?th_chksum/binary,CheckStr:?th_chksum_len/binary,H2/binary>> = Bin,
-    case checksum(H1) + checksum(H2) of
-	0 -> eof;
-	Checksum0 ->
-	    Csum = from_octal(CheckStr),
-	    CsumInit = ?th_chksum_len * $\s,
-	    case Checksum0 + CsumInit of 
-		Csum -> ok;
-		Unsigned ->
-		    verify_checksum(H1, H2, CsumInit, Csum, Unsigned)
-	    end
+do_parse_pax(Reader, <<>>, Headers) ->
+    {Headers, Reader};
+do_parse_pax(Reader, Bin, Headers) ->
+    {Key, Value, Residual} = parse_pax_record(Bin),
+    NewHeaders = maps:put(Key, Value, Headers),
+    do_parse_pax(Reader, Residual, NewHeaders).
+
+%% Parse an extended attribute
+parse_pax_record(Bin) when is_binary(Bin) ->
+    case binary:split(Bin, [<<$\n>>]) of
+        [Record, Residual] ->
+            case binary:split(Record, [<<$\s>>], [trim_all]) of
+                [_Len, Record1] ->
+                    case binary:split(Record1, [<<$=>>], [trim_all]) of
+                        [AttrName, AttrValue] ->
+                            {AttrName, AttrValue, Residual};
+                        _Other ->
+                            throw({error, malformed_pax_record})
+                    end;
+                _Other ->
+                    throw({error, malformed_pax_record})
+            end;
+        _Other ->
+            throw({error, malformed_pax_record})
     end.
 
-%% The checksums didn't match.  Now try a signed addition.
+get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+    {"", Handle};
+get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
+    case do_read(Handle0, NumBytes) of
+        {ok, RealName, Handle1} ->
+            {RealName, Handle1};
+        {error, _} = Err ->
+            throw(Err)
+    end;
+get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
+    case do_read(Reader0, NumBytes) of
+        {ok, RealName, Reader1} ->
+            {RealName, Reader1};
+        {error, _} = Err ->
+            throw(Err)
+    end.
 
-verify_checksum(H1, H2, Csum, ShouldBe, Unsigned) ->
-    case signed_sum(binary_to_list(H1), signed_sum(binary_to_list(H2), Csum)) of
-	ShouldBe -> ok;
-	Signed ->
-	    throw({error,
-		   {bad_header, 
-		    "Incorrect directory checksum ~w (~w), should be ~w",
-		    [Signed, Unsigned, ShouldBe]}})
+%% Skip the remaining bytes for the current file entry
+skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
+    Padding = skip_padding(Size),
+    AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
+    case do_position(Handle0, AbsPos) of
+        {ok, _, Handle1} ->
+            Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
+        Err ->
+            throw(Err)
+    end;
+skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
+    case do_read(Reader, Size-Pos) of
+        {ok, _, Reader2} ->
+            Reader2;
+        Err ->
+            throw(Err)
     end.
 
-signed_sum([C|Rest], Sum) when C < 128 ->
-    signed_sum(Rest, Sum+C);
-signed_sum([C|Rest], Sum) ->
-    signed_sum(Rest, Sum+C-256);
-signed_sum([], Sum) -> Sum.
-
-write_extracted_element(Header, Bin, Opts)
-  when Opts#read_opts.output =:= memory ->
-    case Header#tar_header.typeflag of
-	regular ->
-	    {ok, {Header#tar_header.name, Bin}};
-	_ ->
-	    ok
+skip_padding(0) ->
+    0;
+skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
+    0;
+skip_padding(Size) when Size =< ?BLOCK_SIZE ->
+    ?BLOCK_SIZE - Size;
+skip_padding(Size) ->
+    ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
+
+skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
+    Padding = skip_padding(Pos + ?BLOCK_SIZE),
+    AbsPos = Pos + Padding,
+    case do_position(Reader0, AbsPos) of
+        {ok, _, Reader1} ->
+            {ok, Reader1};
+        Err ->
+            throw(Err)
+    end;
+skip_unread(#reader{}=Reader) ->
+    {ok, Reader};
+skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
+    skip_unread(Handle);
+skip_unread(#reg_file_reader{}=Reader) ->
+    #reg_file_reader{handle=Handle} = skip_file(Reader),
+    {ok, Handle};
+skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
+    skip_unread(Handle);
+skip_unread(#sparse_file_reader{}=Reader) ->
+    #sparse_file_reader{handle=Handle} = skip_file(Reader),
+    {ok, Handle}.
+
+write_extracted_element(#tar_header{name=Name,typeflag=Type},
+                        Bin,
+                        #read_opts{output=memory}=Opts) ->
+    case typeflag(Type) of
+        regular ->
+            read_verbose(Opts, "x ~ts~n", [Name]),
+            {ok, {Name, Bin}};
+        _ ->
+            ok
     end;
-write_extracted_element(Header, Bin, Opts) ->
-    Name = filename:absname(Header#tar_header.name, Opts#read_opts.cwd),
-    Created = 
-	case Header#tar_header.typeflag of
-	    regular ->
-		write_extracted_file(Name, Bin, Opts);
-	    directory ->
-		create_extracted_dir(Name, Opts);
-	    symlink ->
-		create_symlink(Name, Header, Opts);
-	    Other ->				% Ignore.
-		read_verbose(Opts, "x ~ts - unsupported type ~p~n",
-			     [Name, Other]),
-		not_written
-	end,
+write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
+    Name1 = filename:absname(Name0, Opts#read_opts.cwd),
+    Created =
+        case typeflag(Header#tar_header.typeflag) of
+            regular ->
+                create_regular(Name1, Name0, Bin, Opts);
+            directory ->
+                read_verbose(Opts, "x ~ts~n", [Name0]),
+                create_extracted_dir(Name1, Opts);
+            symlink ->
+                read_verbose(Opts, "x ~ts~n", [Name0]),
+                create_symlink(Name1, Header#tar_header.linkname, Opts);
+            Device when Device =:= char orelse Device =:= block ->
+                %% char/block devices will be created as empty files
+                %% and then have their major/minor device set later
+                create_regular(Name1, Name0, <<>>, Opts);
+            fifo ->
+                %% fifo devices will be created as empty files
+                create_regular(Name1, Name0, <<>>, Opts);
+            Other -> % Ignore.
+                read_verbose(Opts, "x ~ts - unsupported type ~p~n",
+                             [Name0, Other]),
+                not_written
+        end,
     case Created of
-	ok  -> set_extracted_file_info(Name, Header);
-	not_written -> ok
+        ok  -> set_extracted_file_info(Name1, Header);
+        not_written -> ok
+    end.
+
+create_regular(Name, NameInArchive, Bin, Opts) ->
+    case write_extracted_file(Name, Bin, Opts) of
+        not_written ->
+            read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
+            not_written;
+        Ok ->
+            read_verbose(Opts, "x ~ts~n", [NameInArchive]),
+            Ok
     end.
 
 create_extracted_dir(Name, _Opts) ->
     case file:make_dir(Name) of
-	ok -> ok;
-	{error,enotsup} -> not_written;
-	{error,eexist} -> not_written;
-	{error,enoent} -> make_dirs(Name, dir);
-	{error,Reason} -> throw({error, Reason})
+        ok -> ok;
+        {error,enotsup} -> not_written;
+        {error,eexist} -> not_written;
+        {error,enoent} -> make_dirs(Name, dir);
+        {error,Reason} -> throw({error, Reason})
     end.
 
-create_symlink(Name, #tar_header{linkname=Linkname}=Header, Opts) ->
+create_symlink(Name, Linkname, Opts) ->
     case file:make_symlink(Linkname, Name) of
-	ok -> ok;
-	{error,enoent} ->
-	    ok = make_dirs(Name, file),
-	    create_symlink(Name, Header, Opts);
-	{error,eexist} -> not_written;
-	{error,enotsup} ->
-	    read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
-	    not_written;
-	{error,Reason} -> throw({error, Reason})
+        ok -> ok;
+        {error,enoent} ->
+            ok = make_dirs(Name, file),
+            create_symlink(Name, Linkname, Opts);
+        {error,eexist} -> not_written;
+        {error,enotsup} ->
+            read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
+            not_written;
+        {error,Reason} -> throw({error, Reason})
     end.
 
 write_extracted_file(Name, Bin, Opts) ->
     Write =
-	case Opts#read_opts.keep_old_files of
-	    true ->
-		case file:read_file_info(Name) of
-		    {ok, _} -> false;
-		    _ -> true
-		end;
-	    false -> true
-	end,
+        case Opts#read_opts.keep_old_files of
+            true ->
+                case file:read_file_info(Name) of
+                    {ok, _} -> false;
+                    _ -> true
+                end;
+            false -> true
+        end,
     case Write of
-	true ->
-	    read_verbose(Opts, "x ~ts~n", [Name]),
-	    write_file(Name, Bin);
-	false ->
-	    read_verbose(Opts, "x ~ts - exists, not created~n", [Name]),
-	    not_written
+        true  -> write_file(Name, Bin);
+        false -> not_written
     end.
 
 write_file(Name, Bin) ->
     case file:write_file(Name, Bin) of
-	ok -> ok;
-	{error,enoent} ->
-	    ok = make_dirs(Name, file),
-	    write_file(Name, Bin);
-	{error,Reason} ->
-	    throw({error, Reason})
+        ok -> ok;
+        {error,enoent} ->
+            ok = make_dirs(Name, file),
+            write_file(Name, Bin);
+        {error,Reason} ->
+            throw({error, Reason})
     end.
 
-set_extracted_file_info(_, #tar_header{typeflag = symlink}) -> ok;
-set_extracted_file_info(Name, #tar_header{mode=Mode, mtime=Mtime}) ->
-    Info = #file_info{mode=Mode, mtime=posix_to_erlang_time(Mtime)},
+set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
+set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK})    -> ok;
+set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
+    set_device_info(Name, Header);
+set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
+    set_device_info(Name, Header);
+set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
+    Info = #file_info{mode=Mode, mtime=Mtime},
+    file:write_file_info(Name, Info).
+
+set_device_info(Name, #tar_header{}=Header) ->
+    Mtime = Header#tar_header.mtime,
+    Mode = Header#tar_header.mode,
+    Devmajor = Header#tar_header.devmajor,
+    Devminor = Header#tar_header.devminor,
+    Info = #file_info{
+              mode=Mode,
+              mtime=Mtime,
+              major_device=Devmajor,
+              minor_device=Devminor
+             },
     file:write_file_info(Name, Info).
 
 %% Makes all directories leading up to the file.
 
 make_dirs(Name, file) ->
-	filelib:ensure_dir(Name);
+    filelib:ensure_dir(Name);
 make_dirs(Name, dir) ->
-	filelib:ensure_dir(filename:join(Name,"*")).
+    filelib:ensure_dir(filename:join(Name,"*")).
 
 %% Prints the message on if the verbose option is given (for reading).
-
 read_verbose(#read_opts{verbose=true}, Format, Args) ->
-    io:format(Format, Args),
-    io:nl();
+    io:format(Format, Args);
 read_verbose(_, _, _) ->
     ok.
 
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%
-%%% 	Utility functions.
-%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% Returns the checksum of a binary.
-
-checksum(Bin) -> checksum(Bin, 0).
-
-checksum(<<A,B,C,D,E,F,G,H,T/binary>>, Sum) ->
-    checksum(T, Sum+A+B+C+D+E+F+G+H);
-checksum(<<A,T/binary>>, Sum) ->
-    checksum(T, Sum+A);
-checksum(<<>>, Sum) -> Sum.
-
-%% Returns a list of zeroes to pad out to the given block size.
-
-padding(Size, BlockSize) ->
-    zeroes(pad_size(Size, BlockSize)).
-
-pad_size(Size, BlockSize) ->
-    case Size rem BlockSize of
-	0 -> 0;
-	Rem -> BlockSize-Rem
-    end.
-
-zeroes(0) -> [];
-zeroes(1) -> [0];
-zeroes(2) -> [0,0];
-zeroes(Number) ->
-    Half = zeroes(Number div 2),
-    case Number rem 2 of
-	0 -> [Half|Half];
-	1 -> [Half|[0|Half]]
-    end.
-
-%% Skips the given number of bytes rounded up to an even record.
-
-skip(File, Size) ->
-    %% Note: There is no point in handling failure to get the current position
-    %% in the file.  If it doesn't work, something serious is wrong.
-    Amount = ((Size + ?record_size - 1) div ?record_size) * ?record_size,
-    {ok,_} = do_position(File, {cur, Amount}),
-    ok.
-
-%% Skips to the next record in the file.
-
-skip_to_next(File) ->
-    %% Note: There is no point in handling failure to get the current position
-    %% in the file.  If it doesn't work, something serious is wrong.
-    {ok, Position} = do_position(File, {cur, 0}),
-    NewPosition = ((Position + ?record_size - 1) div ?record_size) * ?record_size,
-    {ok,NewPosition} = do_position(File, NewPosition),
-    ok.
-
 %% Prints the message on if the verbose option is given.
-
 add_verbose(#add_opts{verbose=true}, Format, Args) ->
     io:format(Format, Args);
 add_verbose(_, _, _) ->
     ok.
 
-%% Converts a tuple containing the time to a Posix time (seconds
-%% since Jan 1, 1970).
+%%%%%%%%%%%%%%%%%%
+%% I/O primitives
+%%%%%%%%%%%%%%%%%%
+
+do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
+  when is_function(Fun,2) ->
+    case Fun(write,{Handle,Data}) of
+        ok ->
+            {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
+            {ok, Reader1#reader{pos=Pos}};
+        {error, _} = Err ->
+            Err
+    end.
 
-posix_time(Time) ->
-    EpochStart = {{1970,1,1},{0,0,0}},
-    {Days,{Hour,Min,Sec}} = calendar:time_difference(EpochStart, Time),
-    86400*Days + 3600*Hour + 60*Min + Sec.
+do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
+  when is_function(Fun, 2) ->
+    do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
+do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
+    when is_function(Fun, 2) ->
+    case file:open(Source, [read, binary]) of
+        {ok, SourceFd} ->
+            case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
+                {ok, _Copied, _Reader2} = Ok->
+                    _ = file:close(SourceFd),
+                    Ok;
+                Err ->
+                    _ = file:close(SourceFd),
+                    throw(Err)
+            end;
+        Err ->
+            throw(Err)
+    end.
 
-posix_to_erlang_time(Sec) ->
-    OneMillion = 1000000,
-    Time = calendar:now_to_datetime({Sec div OneMillion, Sec rem OneMillion, 0}),
-    erlang:universaltime_to_localtime(Time).
+copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
+    case file:read(Source, ChunkSize) of
+        {ok, Bin} ->
+            {ok, Reader2} = do_write(Reader, Bin),
+            copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
+        eof ->
+            {ok, Copied, Reader};
+        Other ->
+            Other
+    end.
 
-read_file_and_info(Name, Opts) ->
-    ReadInfo = Opts#add_opts.read_info,
-    case ReadInfo(Name) of
-	{ok,Info} when Info#file_info.type =:= regular,
-		       Opts#add_opts.chunk_size>0 ->
-	    {ok,chunked,Info};
-	{ok,Info} when Info#file_info.type =:= regular ->
-	    case file:read_file(Name) of
-		{ok,Bin} ->
-		    {ok,Bin,Info};
-		Error ->
-		    Error
-	    end;
-	{ok,Info} when Info#file_info.type =:= symlink ->
-	    case file:read_link(Name) of
-		{ok,PointsTo} ->
-		    {ok,PointsTo,Info};
-		Error ->
-		    Error
-	    end;
-	{ok, Info} ->
-	    {ok,[],Info};
-	Error ->
-	    Error
+
+do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
+  when is_function(Fun,2)->
+    case Fun(position, {Handle,Pos}) of
+        {ok, NewPos} ->
+            %% since Pos may not always be an absolute seek,
+            %% make sure we update the reader with the new absolute position
+            {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
+            {ok, NewPos, Reader#reader{pos=AbsPos}};
+        Other ->
+            Other
     end.
 
-foreach_while_ok(Fun, [First|Rest]) ->
-    case Fun(First) of
-	ok -> foreach_while_ok(Fun, Rest);
-	Other -> Other
+do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
+    NumBytes = Size - Pos,
+    ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
+    case do_read(Handle, ActualLen) of
+        {ok, Bin, Handle2} ->
+            NewPos = Pos + ActualLen,
+            NumBytes2 = Size - NewPos,
+            Reader1 = Reader#reg_file_reader{
+                        handle=Handle2,
+                        pos=NewPos,
+                        num_bytes=NumBytes2},
+            {ok, Bin, Reader1};
+        Other ->
+            Other
     end;
-foreach_while_ok(_, []) -> ok.
-    
-open_mode(Mode) ->
-    open_mode(Mode, false, [raw], []).
+do_read(#sparse_file_reader{}=Reader, Len) ->
+    do_sparse_read(Reader, Len);
+do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
+  when is_function(Fun,2)->
+    %% Always convert to binary internally
+    case Fun(read2,{Handle,Len}) of
+        {ok, List} when is_list(List) ->
+            Bin = list_to_binary(List),
+            NewPos = Pos+byte_size(Bin),
+            {ok, Bin, Reader#reader{pos=NewPos}};
+        {ok, Bin} when is_binary(Bin) ->
+            NewPos = Pos+byte_size(Bin),
+            {ok, Bin, Reader#reader{pos=NewPos}};
+        Other ->
+            Other
+    end.
 
-open_mode(read, _, Raw, _) ->
-    {ok, read, Raw, []};
-open_mode(write, _, Raw, _) ->
-    {ok, write, Raw, []};
-open_mode([read|Rest], false, Raw, Opts) ->
-    open_mode(Rest, read, Raw, Opts);
-open_mode([write|Rest], false, Raw, Opts) ->
-    open_mode(Rest, write, Raw, Opts);
-open_mode([compressed|Rest], Access, Raw, Opts) ->
-    open_mode(Rest, Access, Raw, [compressed|Opts]);
-open_mode([cooked|Rest], Access, _Raw, Opts) ->
-    open_mode(Rest, Access, [], Opts);
-open_mode([], Access, Raw, Opts) ->
-    {ok, Access, Raw, Opts};
-open_mode(_, _, _, _) ->
-    {error, einval}.
 
-%%%================================================================
-do_write({tar_descriptor,UsrHandle,Fun}, Data) -> Fun(write,{UsrHandle,Data}).
+do_sparse_read(Reader, Len) ->
+    do_sparse_read(Reader, Len, <<>>).
+
+do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
+                                  }=Reader0, Len, Acc) ->
+    %% skip all empty fragments
+    Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
+    do_sparse_read(Reader1, Len, Acc);
+do_sparse_read(#sparse_file_reader{sparse_map=[],
+                                   pos=Pos,size=Size}=Reader0, Len, Acc)
+  when Pos < Size ->
+    %% if there are no more fragments, it is possible that there is one last sparse hole
+    %% this behaviour matches the BSD tar utility
+    %% however, GNU tar stops returning data even if we haven't reached the end
+    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
+    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
+    {ok, Acc, Reader};
+do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
+    {ok, Acc, Reader};
+do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
+                                   pos=Pos}=Reader0, Len, Acc)
+  when Pos < Offset ->
+    {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
+    do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
+                                   pos=Pos}=Reader0, Len, Acc) ->
+    %% we're in a data fragment, so read from it
+    %% end offset of fragment
+    EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
+    %% bytes left in fragment
+    NumBytes = EndPos - Pos,
+    ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
+    case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
+        {ok, Bin, Handle} ->
+            BytesRead = byte_size(Bin),
+            ActualEndPos = Pos+BytesRead,
+            Reader1 = if ActualEndPos =:= EndPos ->
+                              Reader0#sparse_file_reader{sparse_map=Entries};
+                         true ->
+                              Reader0
+                      end,
+            Size = Reader1#sparse_file_reader.size,
+            NumBytes2 = Size - ActualEndPos,
+            Reader2 = Reader1#sparse_file_reader{
+                        handle=Handle,
+                        pos=ActualEndPos,
+                        num_bytes=NumBytes2},
+            do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
+        Other ->
+            Other
+    end.
+
+%% Reads a sparse hole ending at Offset
+read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
+    N = Offset - Pos,
+    N2 = if N > Len ->
+                 Len;
+            true ->
+                 N
+         end,
+    Bin = <<0:N2/unit:8>>,
+    NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
+    {ok, Bin, Reader#sparse_file_reader{
+                num_bytes=NumBytes,
+                pos=Pos+N2}}.
+
+-spec do_close(reader()) -> ok | {error, term()}.
+do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
+    Fun(close,Handle).
+
+%%%%%%%%%%%%%%%%%%
+%% Option parsing
+%%%%%%%%%%%%%%%%%%
 
-do_position({tar_descriptor,UsrHandle,Fun}, Pos) -> Fun(position,{UsrHandle,Pos}).
+extract_opts(List) ->
+    extract_opts(List, default_options()).
 
-do_read({tar_descriptor,UsrHandle,Fun}, Len) -> Fun(read2,{UsrHandle,Len}).
+table_opts(List) ->
+    read_opts(List, default_options()).
+
+default_options() ->
+    {ok, Cwd} = file:get_cwd(),
+    #read_opts{cwd=Cwd}.
 
-do_close({tar_descriptor,UsrHandle,Fun}) -> Fun(close,UsrHandle).
+extract_opts([keep_old_files|Rest], Opts) ->
+    extract_opts(Rest, Opts#read_opts{keep_old_files=true});
+extract_opts([{cwd, Cwd}|Rest], Opts) ->
+    extract_opts(Rest, Opts#read_opts{cwd=Cwd});
+extract_opts([{files, Files}|Rest], Opts) ->
+    Set = ordsets:from_list(Files),
+    extract_opts(Rest, Opts#read_opts{files=Set});
+extract_opts([memory|Rest], Opts) ->
+    extract_opts(Rest, Opts#read_opts{output=memory});
+extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+    extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
+extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+    extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
+extract_opts([verbose|Rest], Opts) ->
+    extract_opts(Rest, Opts#read_opts{verbose=true});
+extract_opts([Other|Rest], Opts) ->
+    extract_opts(Rest, read_opts([Other], Opts));
+extract_opts([], Opts) ->
+    Opts.
+
+read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+    read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
+read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
+    read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
+read_opts([verbose|Rest], Opts) ->
+    read_opts(Rest, Opts#read_opts{verbose=true});
+read_opts([_|Rest], Opts) ->
+    read_opts(Rest, Opts);
+read_opts([], Opts) ->
+    Opts.
diff --git a/lib/stdlib/src/erl_tar.hrl b/lib/stdlib/src/erl_tar.hrl
new file mode 100644
index 0000000000..d646d02989
--- /dev/null
+++ b/lib/stdlib/src/erl_tar.hrl
@@ -0,0 +1,394 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+
+%% Options used when adding files to a tar archive.
+-record(add_opts, {
+	 read_info,          %% Fun to use for read file/link info.
+	 chunk_size = 0,     %% For file reading when sending to sftp. 0=do not chunk
+         verbose = false}).  %% Verbose on/off.
+-type add_opts() :: #add_opts{}.
+
+%% Options used when reading a tar archive.
+-record(read_opts, {
+          cwd                    :: string(),  %% Current working directory.
+          keep_old_files = false :: boolean(), %% Owerwrite or not.
+          files = all,                         %% Set of files to extract (or all)
+          output = file :: 'file' | 'memory',
+          open_mode = [],                      %% Open mode options.
+          verbose = false :: boolean()}).      %% Verbose on/off.
+-type read_opts() :: #read_opts{}.
+
+-type add_opt() :: dereference |
+                   verbose |
+                   {chunks, pos_integer()}.
+
+-type extract_opt() :: {cwd, string()} |
+                       {files, [string()]} |
+                       compressed |
+                       cooked |
+                       memory |
+                       keep_old_files |
+                       verbose.
+
+-type create_opt() :: compressed |
+                      cooked |
+                      dereference |
+                      verbose.
+
+-type filelist() :: [file:filename() |
+                     {string(), binary()} |
+                     {string(), file:filename()}].
+
+%% The tar header, once fully parsed.
+-record(tar_header, {
+          name = "" :: string(),                %% name of header file entry
+          mode = 8#100644 :: non_neg_integer(), %% permission and mode bits
+          uid = 0 :: non_neg_integer(),         %% user id of owner
+          gid = 0 :: non_neg_integer(),         %% group id of owner
+          size = 0 :: non_neg_integer(),        %% length in bytes
+          mtime :: calendar:datetime(),         %% modified time
+          typeflag :: char(),                   %% type of header entry
+          linkname = "" :: string(),            %% target name of link
+          uname = "" :: string(),               %% user name of owner
+          gname = "" :: string(),               %% group name of owner
+          devmajor = 0 :: non_neg_integer(),    %% major number of character or block device
+          devminor = 0 :: non_neg_integer(),    %% minor number of character or block device
+          atime :: calendar:datetime(),         %% access time
+          ctime :: calendar:datetime()          %% status change time
+         }).
+-type tar_header() :: #tar_header{}.
+
+%% Metadata for a sparse file fragment
+-record(sparse_entry, {
+         offset = 0 :: non_neg_integer(),
+         num_bytes = 0 :: non_neg_integer()}).
+-type sparse_entry() :: #sparse_entry{}.
+%% Contains metadata about fragments of a sparse file
+-record(sparse_array, {
+          entries = [] :: [sparse_entry()],
+          is_extended = false :: boolean(),
+          max_entries = 0 :: non_neg_integer()}).
+-type sparse_array() :: #sparse_array{}.
+%% A subset of tar header fields common to all tar implementations
+-record(header_v7, {
+          name :: binary(),
+          mode :: binary(), %% octal
+          uid :: binary(), %% integer
+          gid :: binary(), %% integer
+          size :: binary(), %% integer
+          mtime :: binary(), %% integer
+          checksum :: binary(), %% integer
+          typeflag :: byte(), %% char
+          linkname :: binary()}).
+-type header_v7() :: #header_v7{}.
+%% The set of fields specific to GNU tar formatted archives
+-record(header_gnu, {
+          header_v7 :: header_v7(),
+          magic :: binary(),
+          version :: binary(),
+          uname :: binary(),
+          gname :: binary(),
+          devmajor :: binary(), %% integer
+          devminor :: binary(), %% integer
+          atime :: binary(), %% integer
+          ctime :: binary(), %% integer
+          sparse :: sparse_array(),
+          real_size :: binary()}). %% integer
+-type header_gnu() :: #header_gnu{}.
+%% The set of fields specific to STAR-formatted archives
+-record(header_star, {
+          header_v7 :: header_v7(),
+          magic :: binary(),
+          version :: binary(),
+          uname :: binary(),
+          gname :: binary(),
+          devmajor :: binary(), %% integer
+          devminor :: binary(), %% integer
+          prefix :: binary(),
+          atime :: binary(), %% integer
+          ctime :: binary(), %% integer
+          trailer :: binary()}).
+-type header_star() :: #header_star{}.
+%% The set of fields specific to USTAR-formatted archives
+-record(header_ustar, {
+          header_v7 :: header_v7(),
+          magic :: binary(),
+          version :: binary(),
+          uname :: binary(),
+          gname :: binary(),
+          devmajor :: binary(), %% integer
+          devminor :: binary(), %% integer
+          prefix :: binary()}).
+-type header_ustar() :: #header_ustar{}.
+
+-type header_fields() :: header_v7() |
+                         header_gnu() |
+                         header_star() |
+                         header_ustar().
+
+%% The overall tar reader, it holds the low-level file handle,
+%% its access, position, and the I/O primitives wrapper.
+-record(reader, {
+          handle :: file:io_device() | term(),
+          access :: read | write | ram,
+          pos = 0 :: non_neg_integer(),
+          func :: file_op()
+         }).
+-type reader() :: #reader{}.
+%% A reader for a regular file within the tar archive,
+%% It tracks its current state relative to that file.
+-record(reg_file_reader, {
+          handle :: reader(),
+          num_bytes = 0,
+          pos = 0,
+          size = 0
+         }).
+-type reg_file_reader() :: #reg_file_reader{}.
+%% A reader for a sparse file within the tar archive,
+%% It tracks its current state relative to that file.
+-record(sparse_file_reader, {
+          handle :: reader(),
+          num_bytes = 0, %% bytes remaining
+          pos = 0, %% pos
+          size = 0, %% total size of file
+          sparse_map = #sparse_array{}
+         }).
+-type sparse_file_reader() :: #sparse_file_reader{}.
+
+%% Types for the readers
+-type reader_type() :: reader() | reg_file_reader() | sparse_file_reader().
+-type handle() :: file:io_device() | term().
+
+%% Type for the I/O primitive wrapper function
+-type file_op() :: fun((write | close | read2 | position,
+                       {handle(), iodata()} | handle() | {handle(), non_neg_integer()}
+                        | {handle(), non_neg_integer()}) ->
+                              ok | eof | {ok, string() | binary()} | {ok, non_neg_integer()}
+                                 | {error, term()}).
+
+%% These constants (except S_IFMT) are
+%% used to determine what type of device
+%% a file is. Namely, `S_IFMT band file_info.mode`
+%% will equal one of these contants, and tells us
+%% which type it is. The stdlib file_info record
+%% does not differentiate between device types, and
+%% will not allow us to differentiate between sockets
+%% and named pipes. These constants are pulled from libc.
+-define(S_IFMT, 61440).
+-define(S_IFSOCK, 49152). %% socket
+-define(S_FIFO, 4096).    %% fifo/named pipe
+-define(S_IFBLK, 24576).  %% block device
+-define(S_IFCHR, 8192).   %% character device
+
+%% Typeflag constants for the tar header
+-define(TYPE_REGULAR, $0).         %% regular file
+-define(TYPE_REGULAR_A, 0).        %% regular file
+-define(TYPE_LINK, $1).            %% hard link
+-define(TYPE_SYMLINK, $2).         %% symbolic link
+-define(TYPE_CHAR, $3).            %% character device node
+-define(TYPE_BLOCK, $4).           %% block device node
+-define(TYPE_DIR, $5).             %% directory
+-define(TYPE_FIFO, $6).            %% fifo node
+-define(TYPE_CONT, $7).            %% reserved
+-define(TYPE_X_HEADER, $x).        %% extended header
+-define(TYPE_X_GLOBAL_HEADER, $g). %% global extended header
+-define(TYPE_GNU_LONGNAME, $L).    %% next file has a long name
+-define(TYPE_GNU_LONGLINK, $K).    %% next file symlinks to a file with a long name
+-define(TYPE_GNU_SPARSE, $S).      %% sparse file
+
+%% Mode constants from tar spec
+-define(MODE_ISUID, 4000).    %% set uid
+-define(MODE_ISGID, 2000).    %% set gid
+-define(MODE_ISVTX, 1000).    %% save text (sticky bit)
+-define(MODE_ISDIR, 40000).   %% directory
+-define(MODE_ISFIFO, 10000).  %% fifo
+-define(MODE_ISREG, 100000).  %% regular file
+-define(MODE_ISLNK, 120000).  %% symbolic link
+-define(MODE_ISBLK, 60000).   %% block special file
+-define(MODE_ISCHR, 20000).   %% character special file
+-define(MODE_ISSOCK, 140000). %% socket
+
+%% Keywords for PAX extended header
+-define(PAX_ATIME, <<"atime">>).
+-define(PAX_CHARSET, <<"charset">>).
+-define(PAX_COMMENT, <<"comment">>).
+-define(PAX_CTIME, <<"ctime">>). %% ctime is not a valid pax header
+-define(PAX_GID, <<"gid">>).
+-define(PAX_GNAME, <<"gname">>).
+-define(PAX_LINKPATH, <<"linkpath">>).
+-define(PAX_MTIME, <<"mtime">>).
+-define(PAX_PATH, <<"path">>).
+-define(PAX_SIZE, <<"size">>).
+-define(PAX_UID, <<"uid">>).
+-define(PAX_UNAME, <<"uname">>).
+-define(PAX_XATTR, <<"SCHILY.xattr.">>).
+-define(PAX_XATTR_STR, "SCHILY.xattr.").
+-define(PAX_NONE, <<"">>).
+
+%% Tar format constants
+%% Unknown format
+-define(FORMAT_UNKNOWN, 0).
+%% The format of the original Unix V7 tar tool prior to standardization
+-define(FORMAT_V7, 1).
+%% The old and new GNU formats, incompatible with USTAR.
+%% This covers the old GNU sparse extension, but it does
+%% not cover the GNU sparse extensions using PAX headers,
+%% versions 0.0, 0.1, and 1.0; these fall under the PAX format.
+-define(FORMAT_GNU, 2).
+%% Schily's tar format, which is incompatible with USTAR.
+%% This does not cover STAR extensions to the PAX format; these
+%% fall under the PAX format.
+-define(FORMAT_STAR, 3).
+%% USTAR is the former standardization of tar defined in POSIX.1-1988,
+%% it is incompatible with the GNU and STAR formats.
+-define(FORMAT_USTAR, 4).
+%% PAX is the latest standardization of tar defined in POSIX.1-2001.
+%% This is an extension of USTAR and is "backwards compatible" with it.
+%%
+%% Some newer formats add their own extensions to PAX, such as GNU sparse
+%% files and SCHILY extended attributes. Since they are backwards compatible
+%% with PAX, they will be labelled as "PAX".
+-define(FORMAT_PAX, 5).
+
+%% Magic constants
+-define(MAGIC_GNU, <<"ustar ">>).
+-define(VERSION_GNU, <<" \x00">>).
+-define(MAGIC_USTAR, <<"ustar\x00">>).
+-define(VERSION_USTAR, <<"00">>).
+-define(TRAILER_STAR, <<"tar\x00">>).
+
+%% Size constants
+-define(BLOCK_SIZE, 512). %% size of each block in a tar stream
+-define(NAME_SIZE, 100). %% max length of the name field in USTAR format
+-define(PREFIX_SIZE, 155). %% max length of the prefix field in USTAR format
+
+%% Maximum size of a nanosecond value as an integer
+-define(MAX_NANO_INT_SIZE, 9).
+%% Maximum size of a 64-bit signed integer
+-define(MAX_INT64, (1 bsl 63 - 1)).
+
+-define(PAX_GNU_SPARSE_NUMBLOCKS, <<"GNU.sparse.numblocks">>).
+-define(PAX_GNU_SPARSE_OFFSET, <<"GNU.sparse.offset">>).
+-define(PAX_GNU_SPARSE_NUMBYTES, <<"GNU.sparse.numbytes">>).
+-define(PAX_GNU_SPARSE_MAP, <<"GNU.sparse.map">>).
+-define(PAX_GNU_SPARSE_NAME, <<"GNU.sparse.name">>).
+-define(PAX_GNU_SPARSE_MAJOR, <<"GNU.sparse.major">>).
+-define(PAX_GNU_SPARSE_MINOR, <<"GNU.sparse.minor">>).
+-define(PAX_GNU_SPARSE_SIZE, <<"GNU.sparse.size">>).
+-define(PAX_GNU_SPARSE_REALSIZE, <<"GNU.sparse.realsize">>).
+
+-define(V7_NAME, 0).
+-define(V7_NAME_LEN, 100).
+-define(V7_MODE, 100).
+-define(V7_MODE_LEN, 8).
+-define(V7_UID, 108).
+-define(V7_UID_LEN, 8).
+-define(V7_GID, 116).
+-define(V7_GID_LEN, 8).
+-define(V7_SIZE, 124).
+-define(V7_SIZE_LEN, 12).
+-define(V7_MTIME, 136).
+-define(V7_MTIME_LEN, 12).
+-define(V7_CHKSUM, 148).
+-define(V7_CHKSUM_LEN, 8).
+-define(V7_TYPE, 156).
+-define(V7_TYPE_LEN, 1).
+-define(V7_LINKNAME, 157).
+-define(V7_LINKNAME_LEN, 100).
+
+-define(STAR_TRAILER, 508).
+-define(STAR_TRAILER_LEN, 4).
+
+-define(USTAR_MAGIC, 257).
+-define(USTAR_MAGIC_LEN, 6).
+-define(USTAR_VERSION, 263).
+-define(USTAR_VERSION_LEN, 2).
+-define(USTAR_UNAME, 265).
+-define(USTAR_UNAME_LEN, 32).
+-define(USTAR_GNAME, 297).
+-define(USTAR_GNAME_LEN, 32).
+-define(USTAR_DEVMAJ, 329).
+-define(USTAR_DEVMAJ_LEN, 8).
+-define(USTAR_DEVMIN, 337).
+-define(USTAR_DEVMIN_LEN, 8).
+-define(USTAR_PREFIX, 345).
+-define(USTAR_PREFIX_LEN, 155).
+
+-define(GNU_MAGIC, 257).
+-define(GNU_MAGIC_LEN, 6).
+-define(GNU_VERSION, 263).
+-define(GNU_VERSION_LEN, 2).
+
+%% ?BLOCK_SIZE of zero-bytes.
+%% Two of these in a row mark the end of an archive.
+-define(ZERO_BLOCK, <<0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,
+                      0,0,0,0,0,0,0,0,0,0,0,0>>).
+
+-define(BILLION, 1000000000).
+
+-define(EPOCH, {{1970,1,1}, {0,0,0}}).
-- 
cgit v1.2.3