From ea1ddd8800c82fb8ae7ac2f54f4217a35d6f463d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 11 Oct 2017 14:45:11 +0200
Subject: Dump literals separately to avoid incomplete heap data

When a literal was used from several processes, the literal would
be dumped in only one of the processes. The other processes
that referenced the literals would have incomplete heap data.
---
 lib/observer/src/crashdump_viewer.erl        | 52 ++++++++++++++++++++++++++--
 lib/observer/test/crashdump_viewer_SUITE.erl |  5 ++-
 2 files changed, 53 insertions(+), 4 deletions(-)

(limited to 'lib/observer')

diff --git a/lib/observer/src/crashdump_viewer.erl b/lib/observer/src/crashdump_viewer.erl
index bc3ce5d547..ddf2879a97 100644
--- a/lib/observer/src/crashdump_viewer.erl
+++ b/lib/observer/src/crashdump_viewer.erl
@@ -104,6 +104,7 @@
 -define(index_table,index_table).
 -define(instr_data,instr_data).
 -define(internal_ets,internal_ets).
+-define(literals,literals).
 -define(loaded_modules,loaded_modules).
 -define(memory,memory).
 -define(memory_map,memory_map).
@@ -785,6 +786,9 @@ parse_vsn_str(Str,WS) ->
 %%% Progress is reported during the time and MUST be checked with
 %%% crashdump_viewer:get_progress/0 until it returns {ok,done}.
 do_read_file(File) ->
+    erase(?literals),                           %Clear literal cache.
+    put(truncated,false),                       %Not truncated (yet).
+    erase(truncated_reason),                    %Not truncated (yet).
     case file:read_file_info(File) of
 	{ok,#file_info{type=regular,
 		       access=FileA,
@@ -856,6 +860,19 @@ indexify(Fd,AddrAdj,Bin,N) ->
                 {?proc_heap,LastId} ->
                     [{_,LastPos}] = lookup_index(?proc_heap,LastId),
                     ets:insert(cdv_heap_file_chars,{LastId,N+Start+1-LastPos});
+                {?literals,[]} ->
+                    case get(truncated_reason) of
+                        undefined ->
+                            [{_,LastPos}] = lookup_index(?literals,[]),
+                            ets:insert(cdv_heap_file_chars,
+                                       {literals,N+Start+1-LastPos});
+                        _ ->
+                            %% Literals are truncated. Make sure we never
+                            %% attempt to read in the literals. (Heaps that
+                            %% references literals will show markers for
+                            %% incomplete heaps, but will otherwise work.)
+                            delete_index(?literals, [])
+                    end;
                 _ -> ok
             end,
 	    indexify(Fd,AddrAdj,Rest,N1);
@@ -908,6 +925,7 @@ check_if_truncated() ->
 find_truncated_proc({Tag,_Id}) when Tag==?atoms;
                                     Tag==?binary;
                                     Tag==?instr_data;
+                                    Tag==?literals;
                                     Tag==?memory_status;
                                     Tag==?memory_map ->
     put(truncated_proc,false);
@@ -1386,13 +1404,33 @@ maybe_other_node2(Channel) ->
 expand_memory(Fd,Pid,DumpVsn) ->
     BinAddrAdj = get_bin_addr_adj(DumpVsn),
     put(fd,Fd),
-    Dict = read_heap(Fd,Pid,BinAddrAdj,gb_trees:empty()),
+    Dict0 = case get(?literals) of
+                undefined ->
+                    Literals = read_literals(Fd),
+                    put(?literals,Literals),
+                    put(fd,Fd),
+                    Literals;
+                Literals ->
+                    Literals
+            end,
+    Dict = read_heap(Fd,Pid,BinAddrAdj,Dict0),
     Expanded = {read_stack_dump(Fd,Pid,BinAddrAdj,Dict),
 		read_messages(Fd,Pid,BinAddrAdj,Dict),
 		read_dictionary(Fd,Pid,BinAddrAdj,Dict)},
     erase(fd),
     Expanded.
 
+read_literals(Fd) ->
+    case lookup_index(?literals,[]) of
+	[{_,Start}] ->
+            [{_,Chars}] = ets:lookup(cdv_heap_file_chars,literals),
+            init_progress("Reading literals",Chars),
+	    pos_bof(Fd,Start),
+	    read_heap(0,gb_trees:empty());
+        [] ->
+            gb_trees:empty()
+    end.
+
 %%%-----------------------------------------------------------------
 %%% This is a workaround for a bug in dump versions prior to 0.3:
 %%% Addresses were truncated to 32 bits. This could cause binaries to
@@ -2829,6 +2867,10 @@ reset_tables() ->
 insert_index(Tag,Id,Pos) ->
     ets:insert(cdv_dump_index_table,{{Tag,Pos},Id}).
 
+delete_index(Tag,Id) ->
+    Ms = [{{{Tag,'$1'},Id},[],[true]}],
+    ets:select_delete(cdv_dump_index_table, Ms).
+
 lookup_index({Tag,Id}) ->
     lookup_index(Tag,Id);
 lookup_index(Tag) ->
@@ -2845,6 +2887,7 @@ insert_binary_index(Addr,Pos) ->
 lookup_binary_index(Addr) ->
     ets:lookup(cdv_binary_index_table,Addr).
 
+
 %%-----------------------------------------------------------------
 %% Convert tags read from crashdump to atoms used as first part of key
 %% in cdv_dump_index_table
@@ -2862,6 +2905,7 @@ tag_to_atom("hidden_node") -> ?hidden_node;
 tag_to_atom("index_table") -> ?index_table;
 tag_to_atom("instr_data") -> ?instr_data;
 tag_to_atom("internal_ets") -> ?internal_ets;
+tag_to_atom("literals") -> ?literals;
 tag_to_atom("loaded_modules") -> ?loaded_modules;
 tag_to_atom("memory") -> ?memory;
 tag_to_atom("mod") -> ?mod;
@@ -2885,8 +2929,10 @@ tag_to_atom(UnknownTag) ->
 %%%-----------------------------------------------------------------
 %%% Store last tag for use when truncated, and reason if aborted
 put_last_tag(?abort,Reason) ->
-    %% Don't overwrite the real last tag
-    put(truncated_reason,Reason);
+    %% Don't overwrite the real last tag, and make sure to return
+    %% the previous last tag.
+    put(truncated_reason,Reason),
+    get(last_tag);
 put_last_tag(Tag,Id) ->
     put(last_tag,{Tag,Id}).
 
diff --git a/lib/observer/test/crashdump_viewer_SUITE.erl b/lib/observer/test/crashdump_viewer_SUITE.erl
index 0ec6d8dcfc..6ac9d7d3fb 100644
--- a/lib/observer/test/crashdump_viewer_SUITE.erl
+++ b/lib/observer/test/crashdump_viewer_SUITE.erl
@@ -613,7 +613,10 @@ truncate_dump(File) ->
                   {win32,_} -> <<"\r\n">>;
                   _ -> <<"\n">>
               end,
-    [StartBin,AfterTag] = binary:split(Bin,BinTag),
+    %% Split after "our binary" created by crashdump_helper
+    %% (it may not be the first binary).
+    RE = <<"\n=binary:(?=[0-9A-Z]+",NewLine/binary,"FF:010203)">>,
+    [StartBin,AfterTag] = re:split(Bin,RE,[{parts,2}]),
     [AddrAndSize,BinaryAndRest] = binary:split(AfterTag,Colon),
     [Binary,_Rest] = binary:split(BinaryAndRest,NewLine),
     TruncSize = byte_size(Binary) - 2,
-- 
cgit v1.2.3