diff options
| -rw-r--r-- | lib/tools/doc/src/cover.xml | 5 | ||||
| -rw-r--r-- | lib/tools/src/cover.erl | 139 | ||||
| -rw-r--r-- | lib/tools/test/cover_SUITE.erl | 116 | ||||
| -rw-r--r-- | lib/tools/test/cover_SUITE_data/f.erl | 11 | 
4 files changed, 222 insertions, 49 deletions
diff --git a/lib/tools/doc/src/cover.xml b/lib/tools/doc/src/cover.xml index be0fd5bd47..a2444ec947 100644 --- a/lib/tools/doc/src/cover.xml +++ b/lib/tools/doc/src/cover.xml @@ -106,6 +106,11 @@        from all nodes.</p>      <p>To only collect data from remote nodes without stopping        <c>cover</c> on those nodes, use <c>cover:flush/1</c></p> +    <p>If the connection to a remote node goes down, the main node +      will mark it as lost. If the node comes back it will be added +      again. If the remote node was alive during the disconnected +      periode, cover data from before and during this periode will be +      included in the analysis.</p>    </description>    <funcs>      <func> diff --git a/lib/tools/src/cover.erl b/lib/tools/src/cover.erl index 2154a21ed6..10f14b0a49 100644 --- a/lib/tools/src/cover.erl +++ b/lib/tools/src/cover.erl @@ -26,12 +26,17 @@  %% ARCHITECTURE  %% The coverage tool consists of one process on each node involved in  %% coverage analysis. The process is registered as 'cover_server' -%% (?SERVER).  All cover_servers in the distributed system are linked -%% together.  The cover_server on the 'main' node is in charge, and it -%% traps exits so it can detect nodedown or process crashes on the -%% remote nodes. This process is implemented by the functions -%% init_main/1 and main_process_loop/1. The cover_server on the remote -%% nodes are implemented by the functions init_remote/2 and +%% (?SERVER).  The cover_server on the 'main' node is in charge, and +%% it monitors the cover_servers on all remote nodes. When it gets a +%% 'DOWN' message for another cover_server, it marks the node as +%% 'lost'. If a nodeup is received for a lost node the main node +%% ensures that the cover compiled modules are loaded again. If the +%% remote node was alive during the disconnected periode, cover data +%% for this periode will also be included in the analysis. +%% +%% The cover_server process on the main node is implemented by the +%% functions init_main/1 and main_process_loop/1. The cover_server on +%% the remote nodes are implemented by the functions init_remote/2 and  %% remote_process_loop/1.  %%  %% TABLES @@ -90,7 +95,8 @@  -record(main_state, {compiled=[],           % [{Module,File}]  		     imported=[],           % [{Module,File,ImportFile}]  		     stopper,               % undefined | pid() -		     nodes=[]}).            % [Node] +		     nodes=[],              % [Node] +		     lost_nodes=[]}).       % [Node]  -record(remote_state, {compiled=[],         % [{Module,File}]  		       main_node}).         % atom() @@ -586,39 +592,14 @@ init_main(Starter) ->      ets:new(?BINARY_TABLE, [set, named_table]),      ets:new(?COLLECTION_TABLE, [set, public, named_table]),      ets:new(?COLLECTION_CLAUSE_TABLE, [set, public, named_table]), +    net_kernel:monitor_nodes(true),      Starter ! {?SERVER,started},      main_process_loop(#main_state{}).  main_process_loop(State) ->      receive  	{From, {start_nodes,Nodes}} -> -	    ThisNode = node(), -	    StartedNodes =  -		lists:foldl( -		  fun(Node,Acc) -> -			  case rpc:call(Node,cover,remote_start,[ThisNode]) of -			      {ok,_RPid} -> -				  erlang:monitor(process,{?SERVER,Node}), -				  [Node|Acc]; -			      Error -> -				  io:format("Could not start cover on ~w: ~p\n", -					    [Node,Error]), -				  Acc -			  end -		  end, -		  [], -		  Nodes), - -	    %% In case some of the compiled modules have been unloaded they  -	    %% should not be loaded on the new node. -	    {_LoadedModules,Compiled} =  -		get_compiled_still_loaded(State#main_state.nodes, -					  State#main_state.compiled), -	    remote_load_compiled(StartedNodes,Compiled), -	     -	    State1 =  -		State#main_state{nodes = State#main_state.nodes ++ StartedNodes, -				 compiled = Compiled}, +	    {StartedNodes,State1} = do_start_nodes(Nodes, State),  	    reply(From, {ok,StartedNodes}),  	    main_process_loop(State1); @@ -810,9 +791,24 @@ main_process_loop(State) ->  	    main_process_loop(S);		      	{'DOWN', _MRef, process, {?SERVER,Node}, _Info} -> -	    %% A remote cover_server is down, remove node from list. -	    Nodes1 = State#main_state.nodes--[Node], -	    main_process_loop(State#main_state{nodes=Nodes1}); +	    %% A remote cover_server is down, mark as lost +	    Nodes = State#main_state.nodes--[Node], +	    Lost = [Node|State#main_state.lost_nodes], +	    main_process_loop(State#main_state{nodes=Nodes,lost_nodes=Lost}); + +	{nodeup,Node} -> +	    State1 = +		case lists:member(Node,State#main_state.lost_nodes) of +		    true -> +			sync_compiled(Node,State); +		    false -> +			State +	    end, +	    main_process_loop(State1); + +	{nodedown,_} -> +	    %% Will be taken care of when 'DOWN' message arrives +	    main_process_loop(State);  	{From, get_main_node} ->  	    reply(From, node()), @@ -874,8 +870,13 @@ remote_process_loop(State) ->              unregister(?SERVER),  	    ok; % not replying since 'DOWN' message will be received anyway +	{remote,get_compiled} -> +	    remote_reply(State#remote_state.main_node, +			 State#remote_state.compiled), +	    remote_process_loop(State); +  	{From, get_main_node} -> -	    reply(From, State#remote_state.main_node), +	    remote_reply(From, State#remote_state.main_node),  	    remote_process_loop(State);  	get_status -> @@ -987,6 +988,36 @@ unload([]) ->  %%%--Handling of remote nodes-------------------------------------------- +do_start_nodes(Nodes, State) -> +    ThisNode = node(), +    StartedNodes = +	lists:foldl( +	  fun(Node,Acc) -> +		  case rpc:call(Node,cover,remote_start,[ThisNode]) of +		      {ok,_RPid} -> +			  erlang:monitor(process,{?SERVER,Node}), +			  [Node|Acc]; +		      Error -> +			  io:format("Could not start cover on ~w: ~p\n", +				    [Node,Error]), +			  Acc +		  end +	  end, +	  [], +	  Nodes), + +    %% In case some of the compiled modules have been unloaded they +    %% should not be loaded on the new node. +    {_LoadedModules,Compiled} = +	get_compiled_still_loaded(State#main_state.nodes, +				  State#main_state.compiled), +    remote_load_compiled(StartedNodes,Compiled), + +    State1 = +	State#main_state{nodes = State#main_state.nodes ++ StartedNodes, +			 compiled = Compiled}, +    {StartedNodes, State1}. +  %% start the cover_server on a remote node  remote_start(MainNode) ->      case whereis(?SERVER) of @@ -1010,6 +1041,30 @@ remote_start(MainNode) ->  	    {error,{already_started,Pid}}      end. +%% If a lost node comes back, ensure that main and remote node has the +%% same cover compiled modules. Note that no action is taken if the +%% same {Mod,File} eksists on both, i.e. code change is not handled! +sync_compiled(Node,State) -> +    #main_state{compiled=Compiled0,nodes=Nodes,lost_nodes=Lost}=State, +    State1 = +	case remote_call(Node,{remote,get_compiled}) of +	    {error,node_dead} -> +		{_,S} = do_start_nodes([Node],State), +		S; +	    {error,_} -> +		State; +	    RemoteCompiled -> +		{_,Compiled} =  get_compiled_still_loaded(Nodes,Compiled0), +		Unload = [UM || {UM,_}=U <- RemoteCompiled, +			       false == lists:member(U,Compiled)], +		remote_unload([Node],Unload), +		Load = [L || L <- Compiled, +			     false == lists:member(L,RemoteCompiled)], +		remote_load_compiled([Node],Load), +		State#main_state{compiled=Compiled, nodes=[Node|Nodes]} +	end, +    State1#main_state{lost_nodes=Lost--[Node]}. +  %% Load a set of cover compiled modules on remote nodes,  %% We do it ?MAX_MODS modules at a time so that we don't  %% run out of memory on the cover_server node.  @@ -2279,7 +2334,13 @@ do_reset2([]) ->  do_clear(Module) ->      ets:match_delete(?COVER_CLAUSE_TABLE, {Module,'_'}),      ets:match_delete(?COVER_TABLE, {#bump{module=Module},'_'}), -    ets:match_delete(?COLLECTION_TABLE, {#bump{module=Module},'_'}). +    case lists:member(?COLLECTION_TABLE, ets:all()) of +	true -> +	    %% We're on the main node +	    ets:match_delete(?COLLECTION_TABLE, {#bump{module=Module},'_'}); +	false -> +	    ok +    end.  not_loaded(Module, unloaded, State) ->      do_clear(Module), diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl index 950c4ddbcb..3bf1b44af8 100644 --- a/lib/tools/test/cover_SUITE.erl +++ b/lib/tools/test/cover_SUITE.erl @@ -23,7 +23,7 @@  	 init_per_group/2,end_per_group/2]).  -export([start/1, compile/1, analyse/1, misc/1, stop/1,  -	 distribution/1, export_import/1, +	 distribution/1, reconnect/1, die_and_reconnect/1, export_import/1,  	 otp_5031/1, eif/1, otp_5305/1, otp_5418/1, otp_6115/1, otp_7095/1,           otp_8188/1, otp_8270/1, otp_8273/1, otp_8340/1]). @@ -45,7 +45,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}].  all() ->       case whereis(cover_server) of  	undefined -> -	    [start, compile, analyse, misc, stop, distribution, +	    [start, compile, analyse, misc, stop, +	     distribution, reconnect, die_and_reconnect,  	     export_import, otp_5031, eif, otp_5305, otp_5418,  	     otp_6115, otp_7095, otp_8188, otp_8270, otp_8273,  	     otp_8340]; @@ -408,20 +409,117 @@ distribution(Config) when is_list(Config) ->      ?line true = is_unloaded(LocalBeam),      ?line true = is_unloaded(N2Beam), -    %% Check that cover_server on remote node dies if main node dies +    %% Check that cover_server on remote node does not die if main node dies      ?line {ok,[N1]} = cover:start(N1), -    ?line true = is_pid(rpc:call(N1,erlang,whereis,[cover_server])), +    ?line true = is_pid(N1Server = rpc:call(N1,erlang,whereis,[cover_server])),      ?line exit(whereis(cover_server),kill), -    ?line timer:sleep(10), -    ?line undefined = rpc:call(N1,erlang,whereis,[cover_server]),     -     +    ?line timer:sleep(100), +    ?line N1Server = rpc:call(N1,erlang,whereis,[cover_server]), +      %% Cleanup      ?line Files = lsfiles(),      ?line remove(files(Files, ".beam")),      ?line ?t:stop_node(N1),      ?line ?t:stop_node(N2). -     +%% Test that a lost node is reconnected +reconnect(Config) -> +    DataDir = ?config(data_dir, Config), +    ok = file:set_cwd(DataDir), + +    {ok,a} = compile:file(a), +    {ok,b} = compile:file(b), +    {ok,f} = compile:file(f), + +    {ok,N1} = ?t:start_node(cover_SUITE_reconnect,peer, +			    [{args," -pa " ++ DataDir},{start_cover,false}]), +    {ok,a} = cover:compile(a), +    {ok,f} = cover:compile(f), +    {ok,[N1]} = cover:start(nodes()), + +    %% Some calls to check later +    rpc:call(N1,f,f1,[]), +    cover:flush(N1), +    rpc:call(N1,f,f1,[]), + +    %% This will cause a call to f:f2() when nodes()==[] on N1 +    rpc:cast(N1,f,call_f2_when_isolated,[]), + +    %% Disconnect and check that node is removed from main cover node +    net_kernel:disconnect(N1), +    [] = cover:which_nodes(), +    timer:sleep(500), % allow some time for the f:f2() call + +    %% Do some add one module (b) and remove one module (a) +    code:purge(a), +    {module,a} = code:load_file(a), +    {ok,b} = cover:compile(b), +    cover_compiled = code:which(b), + +    [] = cover:which_nodes(), +    check_f_calls(1,0), % only the first call - before the flush + +    %% Reconnect the node and check that b and f are cover compiled but not a +    net_kernel:connect_node(N1), +    timer:sleep(100), +    [N1] = cover:which_nodes(), % we are reconnected +    cover_compiled = rpc:call(N1,code,which,[b]), +    cover_compiled = rpc:call(N1,code,which,[f]), +    ABeam = rpc:call(N1,code,which,[a]), +    false = (cover_compiled==ABeam), + +    %% Ensure that we have: +    %% * one f1 call from before the flush, +    %% * one f1 call from after the flush but before disconnect +    %% * one f2 call when disconnected +    check_f_calls(2,1), + +    cover:stop(), +    ?t:stop_node(N1), +    ok. + +%% Test that a lost node is reconnected - also if it has been dead +die_and_reconnect(Config) -> +    DataDir = ?config(data_dir, Config), +    ok = file:set_cwd(DataDir), + +    {ok,f} = compile:file(f), + +    NodeName = cover_SUITE_die_and_reconnect, +    {ok,N1} = ?t:start_node(NodeName,peer, +			    [{args," -pa " ++ DataDir},{start_cover,false}]), +    %% {ok,a} = cover:compile(a), +    {ok,f} = cover:compile(f), +    {ok,[N1]} = cover:start(nodes()), + +    %% Some calls to check later +    rpc:call(N1,f,f1,[]), +    cover:flush(N1), +    rpc:call(N1,f,f1,[]), + +    %% Kill the node +    rpc:call(N1,erlang,halt,[]), +    [] = cover:which_nodes(), + +    check_f_calls(1,0), % only the first call - before the flush + +    %% Restart the node and check that cover reconnects +    {ok,N1} = ?t:start_node(NodeName,peer, +			    [{args," -pa " ++ DataDir},{start_cover,false}]), +    timer:sleep(100), +    [N1] = cover:which_nodes(), % we are reconnected +    cover_compiled = rpc:call(N1,code,which,[f]), + +    %% One more call... +    rpc:call(N1,f,f1,[]), + +    %% Ensure that no more calls are counted +    check_f_calls(2,0), + +    cover:stop(), +    ?t:stop_node(N1), +    ok. +  export_import(suite) -> [];  export_import(Config) when is_list(Config) ->      ?line DataDir = ?config(data_dir, Config), @@ -1253,4 +1351,4 @@ is_unloaded(What) ->      end.  check_f_calls(F1,F2) -> -    {ok,[{{f,f1,0},F1},{{f,f2,0},F2}]} = cover:analyse(f,calls,function). +    {ok,[{{f,f1,0},F1},{{f,f2,0},F2}|_]} = cover:analyse(f,calls,function). diff --git a/lib/tools/test/cover_SUITE_data/f.erl b/lib/tools/test/cover_SUITE_data/f.erl index 1ef8bbdb49..ce2963014a 100644 --- a/lib/tools/test/cover_SUITE_data/f.erl +++ b/lib/tools/test/cover_SUITE_data/f.erl @@ -1,5 +1,5 @@  -module(f). --export([f1/0,f2/0]). +-export([f1/0,f2/0,call_f2_when_isolated/0]).  f1() ->      f1_line1, @@ -8,3 +8,12 @@ f1() ->  f2() ->      f2_line1,      f2_line2. + +call_f2_when_isolated() -> +    case nodes() of +	[] -> +	    f2(); +	_ -> +	    timer:sleep(100), +	    call_f2_when_isolated() +    end.  | 
