diff options
author | Siri Hansen <[email protected]> | 2017-09-07 15:56:18 +0200 |
---|---|---|
committer | Siri Hansen <[email protected]> | 2017-09-15 09:57:25 +0200 |
commit | ae089c72fb06b069675cbebcec10f0820cf16112 (patch) | |
tree | d1ba5bc2e8ec4f230af3b86b6d0578a9d5ce2a1e /lib | |
parent | 903a289213aa22b5c9c42ead2599174c2cf15b95 (diff) | |
download | otp-ae089c72fb06b069675cbebcec10f0820cf16112.tar.gz otp-ae089c72fb06b069675cbebcec10f0820cf16112.tar.bz2 otp-ae089c72fb06b069675cbebcec10f0820cf16112.zip |
cdv: Optimize reading of crashdump with many binaries
Earlier, crashdump_viewer stored an index of all binaries in a gb_tree
on startup. The binary index was also stored in the
cdv_dump_index_table along with all other "=xxx" tags from the
dump. The difference between the indices was that the ets table
contained the addresses of the binaries as strings (the hex address
found after the "=binary:" tag) and in the gb_tree this hex address
was instead converted to its integer value. The index in the ets table
was only used once - when creating the gb_tree. The gb_tree was used
for all later looups (to map integer address to file position).
This commit replaces the two storages with one new ets table,
cdv_binary_index_table, using the integer value of the hex address as
key, and the position in the crashdump file as value. In the case of
many binaries, this makes the start of crashdump viewer faster (only
one place to write), and the data usage smaller (hex address strings
are no longer stored). And it avoids the gc of the gb_tree.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/observer/src/crashdump_viewer.erl | 138 | ||||
-rw-r--r-- | lib/observer/src/observer_html_lib.erl | 29 | ||||
-rw-r--r-- | lib/observer/test/crashdump_viewer_SUITE.erl | 125 |
3 files changed, 193 insertions, 99 deletions
diff --git a/lib/observer/src/crashdump_viewer.erl b/lib/observer/src/crashdump_viewer.erl index f7c44628cf..0534ead50e 100644 --- a/lib/observer/src/crashdump_viewer.erl +++ b/lib/observer/src/crashdump_viewer.erl @@ -36,7 +36,6 @@ %% file: The name of the crashdump currently viewed. %% dump_vsn: The version number of the crashdump %% wordsize: 4 | 8, the number of bytes in a word. -%% binaries: a gb_tree containing binaries or links to binaries in the dump %% %% User API @@ -124,7 +123,7 @@ -define(visible_node,visible_node). --record(state,{file,dump_vsn,wordsize=4,num_atoms="unknown",binaries}). +-record(state,{file,dump_vsn,wordsize=4,num_atoms="unknown"}). %%%----------------------------------------------------------------- %%% Debugging @@ -307,6 +306,7 @@ expand_binary(Pos) -> init([]) -> ets:new(cdv_dump_index_table,[ordered_set,named_table,public]), ets:new(cdv_reg_proc_table,[ordered_set,named_table,public]), + ets:new(cdv_binary_index_table,[ordered_set,named_table,public]), {ok, #state{}}. %%-------------------------------------------------------------------- @@ -350,9 +350,9 @@ handle_call(procs_summary,_From,State=#state{file=File,wordsize=WS}) -> Procs = procs_summary(File,WS), {reply,{ok,Procs,TW},State}; handle_call({proc_details,Pid},_From, - State=#state{file=File,wordsize=WS,dump_vsn=DumpVsn,binaries=B})-> + State=#state{file=File,wordsize=WS,dump_vsn=DumpVsn})-> Reply = - case get_proc_details(File,Pid,WS,DumpVsn,B) of + case get_proc_details(File,Pid,WS,DumpVsn) of {ok,Proc,TW} -> {ok,Proc,TW}; Other -> @@ -464,9 +464,9 @@ handle_call(schedulers,_From,State=#state{file=File}) -> %%-------------------------------------------------------------------- handle_cast({read_file,File}, _State) -> case do_read_file(File) of - {ok,Binaries,DumpVsn} -> + {ok,DumpVsn} -> observer_lib:report_progress({ok,done}), - {noreply, #state{file=File,binaries=Binaries,dump_vsn=DumpVsn}}; + {noreply, #state{file=File,dump_vsn=DumpVsn}}; Error -> end_progress(Error), {noreply, #state{}} @@ -793,18 +793,17 @@ do_read_file(File) -> {Tag,Id,Rest,N1} = tag(Fd,TagAndRest,1), case Tag of ?erl_crash_dump -> - reset_index_table(), + reset_tables(), insert_index(Tag,Id,N1+1), put_last_tag(Tag,""), - indexify(Fd,Rest,N1), + DumpVsn = [list_to_integer(L) || + L<-string:tokens(Id,".")], + AddrAdj = get_bin_addr_adj(DumpVsn), + indexify(Fd,AddrAdj,Rest,N1), end_progress(), check_if_truncated(), - [{DumpVsn0,_}] = lookup_index(?erl_crash_dump), - DumpVsn = [list_to_integer(L) || - L<-string:tokens(DumpVsn0,".")], - Binaries = read_binaries(Fd,DumpVsn), close(Fd), - {ok,Binaries,DumpVsn}; + {ok,DumpVsn}; _Other -> R = io_lib:format( "~ts is not an Erlang crash dump~n", @@ -832,15 +831,26 @@ do_read_file(File) -> {error,R} end. -indexify(Fd,Bin,N) -> +indexify(Fd,AddrAdj,Bin,N) -> case binary:match(Bin,<<"\n=">>) of {Start,Len} -> Pos = Start+Len, <<_:Pos/binary,TagAndRest/binary>> = Bin, {Tag,Id,Rest,N1} = tag(Fd,TagAndRest,N+Pos), - insert_index(Tag,Id,N1+1), % +1 to get past newline + NewPos = N1+1, % +1 to get past newline + case Tag of + ?binary -> + %% Binaries are stored in a separate table in + %% order to minimize lookup time. Key is the + %% translated address. + {HexAddr,_} = get_hex(Id), + Addr = HexAddr bor AddrAdj, + insert_binary_index(Addr,NewPos); + _ -> + insert_index(Tag,Id,NewPos) + end, put_last_tag(Tag,Id), - indexify(Fd,Rest,N1); + indexify(Fd,AddrAdj,Rest,N1); nomatch -> case progress_read(Fd) of {ok,Chunk0} when is_binary(Chunk0) -> @@ -851,7 +861,7 @@ indexify(Fd,Bin,N) -> _ -> {Chunk0,N+byte_size(Bin)} end, - indexify(Fd,Chunk,N1); + indexify(Fd,AddrAdj,Chunk,N1); eof -> eof end @@ -1040,14 +1050,14 @@ procs_summary(File,WS) -> %%----------------------------------------------------------------- %% Page with one process -get_proc_details(File,Pid,WS,DumpVsn,Binaries) -> +get_proc_details(File,Pid,WS,DumpVsn) -> case lookup_index(?proc,Pid) of [{_,Start}] -> Fd = open(File), {{Stack,MsgQ,Dict},TW} = case truncated_warning([{?proc,Pid}]) of [] -> - {expand_memory(Fd,Pid,DumpVsn,Binaries),[]}; + {expand_memory(Fd,Pid,DumpVsn),[]}; TW0 -> {{[],[],[]},TW0} end, @@ -1365,10 +1375,10 @@ maybe_other_node2(Channel) -> end. -expand_memory(Fd,Pid,DumpVsn,Binaries) -> +expand_memory(Fd,Pid,DumpVsn) -> BinAddrAdj = get_bin_addr_adj(DumpVsn), put(fd,Fd), - Dict = read_heap(Fd,Pid,BinAddrAdj,Binaries), + Dict = read_heap(Fd,Pid,BinAddrAdj,gb_trees:empty()), Expanded = {read_stack_dump(Fd,Pid,BinAddrAdj,Dict), read_messages(Fd,Pid,BinAddrAdj,Dict), read_dictionary(Fd,Pid,BinAddrAdj,Dict)}, @@ -1386,25 +1396,6 @@ get_bin_addr_adj(_) -> 0. %%% -%%% Read binaries. -%%% -read_binaries(Fd,DumpVsn) -> - AllBinaries = lookup_index(?binary), - AddrAdj = get_bin_addr_adj(DumpVsn), - Fun = fun({Addr0,Pos},Dict0) -> - pos_bof(Fd,Pos), - {HexAddr,_} = get_hex(Addr0), - Addr = HexAddr bor AddrAdj, - Bin = - case line_head(Fd) of - {eof,_} -> '#CDVTruncatedBinary'; - _Size -> {'#CDVBin',Pos} - end, - gb_trees:enter(Addr,Bin,Dict0) - end, - progress_foldl("Processing binaries",Fun,gb_trees:empty(),AllBinaries). - -%%% %%% Read top level section. %%% @@ -2564,9 +2555,9 @@ parse_heap_term("Yc"++Line0, Addr, BinAddrAdj, D0) -> %Reference-counted binary. {Offset,":"++Line2} = get_hex(Line1), {Sz,Line} = get_hex(Line2), Binp = Binp0 bor BinAddrAdj, - Term = case gb_trees:lookup(Binp, D0) of - {value,Bin} -> cdvbin(Offset,Sz,Bin); - none -> '#CDVNonexistingBinary' + Term = case lookup_binary_index(Binp) of + [{_,Start}] -> cdvbin(Offset,Sz,{'#CDVBin',Start}); + [] -> '#CDVNonexistingBinary' end, D = gb_trees:insert(Addr, Term, D0), {Term,Line,D}; @@ -2575,15 +2566,14 @@ parse_heap_term("Ys"++Line0, Addr, BinAddrAdj, D0) -> %Sub binary. {Offset,":"++Line2} = get_hex(Line1), {Sz,Line} = get_hex(Line2), Binp = Binp0 bor BinAddrAdj, - Term = case gb_trees:lookup(Binp, D0) of - {value,Bin} -> cdvbin(Offset,Sz,Bin); - none when Binp0=/=Binp -> + Term = case lookup_binary_index(Binp) of + [{_,Start}] -> cdvbin(Offset,Sz,{'#CDVBin',Start}); + [] -> %% Might it be on the heap? - case gb_trees:lookup(Binp0, D0) of + case gb_trees:lookup(Binp, D0) of {value,Bin} -> cdvbin(Offset,Sz,Bin); none -> '#CDVNonexistingBinary' - end; - none -> '#CDVNonexistingBinary' + end end, D = gb_trees:insert(Addr, Term, D0), {Term,Line,D}. @@ -2739,12 +2729,20 @@ get_label([H|T], Acc) -> get_label(T, [H|Acc]). get_binary(Line0) -> - {N,":"++Line} = get_hex(Line0), - do_get_binary(N, Line, []). + case get_hex(Line0) of + {N,":"++Line} -> + do_get_binary(N, Line, []); + _ -> + {'#CDVTruncatedBinary',[]} + end. get_binary(Offset,Size,Line0) -> - {_N,":"++Line} = get_hex(Line0), - do_get_binary(Size, lists:sublist(Line,(Offset*2)+1,Size*2), []). + case get_hex(Line0) of + {_N,":"++Line} -> + do_get_binary(Size, lists:sublist(Line,(Offset*2)+1,Size*2), []); + _ -> + {'#CDVTruncatedBinary',[]} + end. do_get_binary(0, Line, Acc) -> {list_to_binary(lists:reverse(Acc)),Line}; @@ -2759,12 +2757,16 @@ cdvbin(Offset,Size,{'#CDVBin',Pos}) -> cdvbin(Offset,Size,['#CDVBin',_,_,Pos]) -> ['#CDVBin',Offset,Size,Pos]; cdvbin(_,_,'#CDVTruncatedBinary') -> - '#CDVTruncatedBinary'. + '#CDVTruncatedBinary'; +cdvbin(_,_,'#CDVNonexistingBinary') -> + '#CDVNonexistingBinary'. %%----------------------------------------------------------------- -%% Functions for accessing the cdv_dump_index_table -reset_index_table() -> - ets:delete_all_objects(cdv_dump_index_table). +%% Functions for accessing tables +reset_tables() -> + ets:delete_all_objects(cdv_dump_index_table), + ets:delete_all_objects(cdv_reg_proc_table), + ets:delete_all_objects(cdv_binary_index_table). insert_index(Tag,Id,Pos) -> ets:insert(cdv_dump_index_table,{{Tag,Pos},Id}). @@ -2779,6 +2781,11 @@ lookup_index(Tag,Id) -> count_index(Tag) -> ets:select_count(cdv_dump_index_table,[{{{Tag,'_'},'_'},[],[true]}]). +insert_binary_index(Addr,Pos) -> + ets:insert(cdv_binary_index_table,{Addr,Pos}). + +lookup_binary_index(Addr) -> + ets:lookup(cdv_binary_index_table,Addr). %%----------------------------------------------------------------- %% Convert tags read from crashdump to atoms used as first part of key @@ -2849,23 +2856,6 @@ to_value_list(Record) -> Values. %%%----------------------------------------------------------------- -%%% Fold over List and report progress in percent. -%%% Report is the text to be presented in the progress dialog. -%%% Acc0 is the initial accumulator and will be passed to Fun as the -%%% second arguement, i.e. Fun = fun(Item,Acc) -> NewAcc end. -progress_foldl(Report,Fun,Acc0,List) -> - init_progress(Report, length(List)), - progress_foldl1(Fun,Acc0,List). - -progress_foldl1(Fun,Acc,[H|T]) -> - update_progress(), - progress_foldl1(Fun,Fun(H,Acc),T); -progress_foldl1(_Fun,Acc,[]) -> - end_progress(), - Acc. - - -%%%----------------------------------------------------------------- %%% Map over List and report progress in percent. %%% Report is the text to be presented in the progress dialog. %%% Distribute the load over a number of processes, and File is opened diff --git a/lib/observer/src/observer_html_lib.erl b/lib/observer/src/observer_html_lib.erl index 3dfcc42ada..a85808a472 100644 --- a/lib/observer/src/observer_html_lib.erl +++ b/lib/observer/src/observer_html_lib.erl @@ -337,17 +337,24 @@ href_proc_bin(From, T, Acc, LTB) -> Size = list_to_integer(SizeStr), PreviewSize = min(Size,10), Id = {list_to_integer(Offset),PreviewSize,list_to_integer(Pos)}, - {ok,PreviewBin} = crashdump_viewer:expand_binary(Id), - PreviewStr = preview_string(Size, PreviewBin), - if LTB -> - href("TARGET=\"expanded\"", - ["#Binary?offset="++Offset++ - "&size="++SizeStr++ - "&pos="++Pos], - PreviewStr); - true -> - PreviewStr - end; + case crashdump_viewer:expand_binary(Id) of + {ok, '#CDVTruncatedBinary'} -> + lists:flatten( + "<FONT COLOR=\"#FF0000\">" + "<<...(Truncated Binary)>>" + "</FONT>"); + {ok, PreviewBin} -> + PreviewStr = preview_string(Size, PreviewBin), + if LTB -> + href("TARGET=\"expanded\"", + ["#Binary?offset="++Offset++ + "&size="++SizeStr++ + "&pos="++Pos], + PreviewStr); + true -> + PreviewStr + end + end; [PreviewIntStr,SizeStr,Md5] when From =:= obs -> Size = list_to_integer(SizeStr), PreviewInt = list_to_integer(PreviewIntStr), diff --git a/lib/observer/test/crashdump_viewer_SUITE.erl b/lib/observer/test/crashdump_viewer_SUITE.erl index 4b0127bcfb..4449ec54d1 100644 --- a/lib/observer/test/crashdump_viewer_SUITE.erl +++ b/lib/observer/test/crashdump_viewer_SUITE.erl @@ -76,7 +76,7 @@ end_per_testcase(Case, Config) -> end, ok. -suite() -> [{ct_hooks,[ts_install_cth]}]. +suite() -> []. all() -> [start_stop, @@ -416,19 +416,90 @@ special(File,Procs) -> old_attrib=undefined, old_comp_info=undefined}=Mod2, ok; - %% ".strangemodname" -> - %% {ok,Mods,[]} = crashdump_viewer:loaded_modules(), - %% lookat_all_mods(Mods), - %% ok; - %% ".sort" -> - %% %% sort ports, atoms and modules ???? - %% ok; - %% ".trunc" -> - %% %% ???? - %% ok; - ".trunc.bytes" -> + ".trunc_bin1" -> + %% This is 'full_dist' truncated after the first + %% "=binary:" + %% i.e. no binary exist in the dump + [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs), + Pid = pid_to_list(Pid0), + {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid), + io:format(" process details ok",[]), + + #proc{dict=Dict} = ProcDetails, + + '#CDVNonexistingBinary' = proplists:get_value(bin,Dict), + '#CDVNonexistingBinary' = proplists:get_value(sub_bin,Dict), + + io:format(" nonexisting binaries ok",[]), + ok; + ".trunc_bin2" -> + %% This is 'full_dist' truncated after the first + %% "=binary:Addr\n + %% Size" + %% i.e. binaries are truncated + [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs), + Pid = pid_to_list(Pid0), + {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid), + io:format(" process details ok",[]), + + #proc{dict=Dict} = ProcDetails, + + ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict), + {ok,'#CDVTruncatedBinary'} = + crashdump_viewer:expand_binary({Offset,Size,Pos}), + ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict), + {ok,'#CDVTruncatedBinary'} = + crashdump_viewer:expand_binary({SOffset,SSize,SPos}), + + io:format(" expand truncated binary ok",[]), + ok; + ".trunc_bin3" -> + %% This is 'full_dist' truncated after the first + %% "=binary:Addr\n + %% Size:" + %% i.e. same as 'trunc_bin2', except the colon exists also + [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs), + Pid = pid_to_list(Pid0), + {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid), + io:format(" process details ok",[]), + + #proc{dict=Dict} = ProcDetails, + + ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict), + {ok,'#CDVTruncatedBinary'} = + crashdump_viewer:expand_binary({Offset,Size,Pos}), + ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict), + {ok,'#CDVTruncatedBinary'} = + crashdump_viewer:expand_binary({SOffset,SSize,SPos}), + + io:format(" expand truncated binary ok",[]), + ok; + ".trunc_bin4" -> + %% This is 'full_dist' truncated after the first + %% "=binary:Addr\n + %% Size:BinaryMissinOneByte" + %% i.e. the full binary is truncated, but the sub binary is complete + [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs), + Pid = pid_to_list(Pid0), + {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid), + io:format(" process details ok",[]), + + #proc{dict=Dict} = ProcDetails, + + ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict), + {ok,'#CDVTruncatedBinary'} = + crashdump_viewer:expand_binary({Offset,Size,Pos}), + io:format(" expand truncated binary ok",[]), + ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict), + {ok,<<_:SSize/binary>>} = + crashdump_viewer:expand_binary({SOffset,SSize,SPos}), + io:format(" expand complete sub binary ok",[]), + + ok; + ".trunc_bytes" -> {ok,_,[TW]} = crashdump_viewer:general_info(), {match,_} = re:run(TW,"CRASH DUMP SIZE LIMIT REACHED"), + io:format(" size limit information ok",[]), ok; ".unicode" -> #proc{pid=Pid0} = @@ -504,15 +575,41 @@ do_create_dumps(DataDir,Rel) -> CD3 = dump_with_args(DataDir,Rel,"instr","+Mim true"), CD4 = dump_with_strange_module_name(DataDir,Rel,"strangemodname"), Bytes = rand:uniform(300000) + 100, - CD5 = dump_with_args(DataDir,Rel,"trunc.bytes", + CD5 = dump_with_args(DataDir,Rel,"trunc_bytes", "-env ERL_CRASH_DUMP_BYTES " ++ integer_to_list(Bytes)), CD6 = dump_with_unicode_atoms(DataDir,Rel,"unicode"), - {[CD1,CD2,CD3,CD4,CD5,CD6], DosDump}; + TruncatedDumps = truncate_dump(CD1), + {[CD1,CD2,CD3,CD4,CD5,CD6|TruncatedDumps], DosDump}; _ -> {[CD1,CD2], DosDump} end. +truncate_dump(File) -> + {ok,Bin} = file:read_file(File), + BinTag = <<"\n=binary:">>, + Colon = <<":">>, + NewLine = case os:type() of + {win32,_} -> <<"\r\n">>; + _ -> <<"\n">> + end, + [StartBin,AfterTag] = binary:split(Bin,BinTag), + [AddrAndSize,BinaryAndRest] = binary:split(AfterTag,Colon), + [Binary,_Rest] = binary:split(BinaryAndRest,NewLine), + TruncSize = byte_size(Binary) - 2, + <<TruncBinary:TruncSize/binary,_/binary>> = Binary, + TruncName = filename:rootname(File) ++ ".trunc_bin", + write_trunc_files(TruncName,StartBin, + [BinTag,AddrAndSize,Colon,TruncBinary],1). + +write_trunc_files(TruncName0,Bin,[Part|Parts],N) -> + TruncName = TruncName0++integer_to_list(N), + Bin1 = <<Bin/binary,Part/binary>>, + ok = file:write_file(TruncName,Bin1), + [TruncName|write_trunc_files(TruncName0,Bin1,Parts,N+1)]; +write_trunc_files(_,_,[],_) -> + []. + %% Create a dump which has three visible nodes, one hidden and one %% not connected node, and with monitors and links between nodes. |