aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/tools/doc/src/cover.xml5
-rw-r--r--lib/tools/src/cover.erl139
-rw-r--r--lib/tools/test/cover_SUITE.erl116
-rw-r--r--lib/tools/test/cover_SUITE_data/f.erl11
4 files changed, 222 insertions, 49 deletions
diff --git a/lib/tools/doc/src/cover.xml b/lib/tools/doc/src/cover.xml
index be0fd5bd47..a2444ec947 100644
--- a/lib/tools/doc/src/cover.xml
+++ b/lib/tools/doc/src/cover.xml
@@ -106,6 +106,11 @@
from all nodes.</p>
<p>To only collect data from remote nodes without stopping
<c>cover</c> on those nodes, use <c>cover:flush/1</c></p>
+ <p>If the connection to a remote node goes down, the main node
+ will mark it as lost. If the node comes back it will be added
+ again. If the remote node was alive during the disconnected
+ periode, cover data from before and during this periode will be
+ included in the analysis.</p>
</description>
<funcs>
<func>
diff --git a/lib/tools/src/cover.erl b/lib/tools/src/cover.erl
index 2154a21ed6..10f14b0a49 100644
--- a/lib/tools/src/cover.erl
+++ b/lib/tools/src/cover.erl
@@ -26,12 +26,17 @@
%% ARCHITECTURE
%% The coverage tool consists of one process on each node involved in
%% coverage analysis. The process is registered as 'cover_server'
-%% (?SERVER). All cover_servers in the distributed system are linked
-%% together. The cover_server on the 'main' node is in charge, and it
-%% traps exits so it can detect nodedown or process crashes on the
-%% remote nodes. This process is implemented by the functions
-%% init_main/1 and main_process_loop/1. The cover_server on the remote
-%% nodes are implemented by the functions init_remote/2 and
+%% (?SERVER). The cover_server on the 'main' node is in charge, and
+%% it monitors the cover_servers on all remote nodes. When it gets a
+%% 'DOWN' message for another cover_server, it marks the node as
+%% 'lost'. If a nodeup is received for a lost node the main node
+%% ensures that the cover compiled modules are loaded again. If the
+%% remote node was alive during the disconnected periode, cover data
+%% for this periode will also be included in the analysis.
+%%
+%% The cover_server process on the main node is implemented by the
+%% functions init_main/1 and main_process_loop/1. The cover_server on
+%% the remote nodes are implemented by the functions init_remote/2 and
%% remote_process_loop/1.
%%
%% TABLES
@@ -90,7 +95,8 @@
-record(main_state, {compiled=[], % [{Module,File}]
imported=[], % [{Module,File,ImportFile}]
stopper, % undefined | pid()
- nodes=[]}). % [Node]
+ nodes=[], % [Node]
+ lost_nodes=[]}). % [Node]
-record(remote_state, {compiled=[], % [{Module,File}]
main_node}). % atom()
@@ -586,39 +592,14 @@ init_main(Starter) ->
ets:new(?BINARY_TABLE, [set, named_table]),
ets:new(?COLLECTION_TABLE, [set, public, named_table]),
ets:new(?COLLECTION_CLAUSE_TABLE, [set, public, named_table]),
+ net_kernel:monitor_nodes(true),
Starter ! {?SERVER,started},
main_process_loop(#main_state{}).
main_process_loop(State) ->
receive
{From, {start_nodes,Nodes}} ->
- ThisNode = node(),
- StartedNodes =
- lists:foldl(
- fun(Node,Acc) ->
- case rpc:call(Node,cover,remote_start,[ThisNode]) of
- {ok,_RPid} ->
- erlang:monitor(process,{?SERVER,Node}),
- [Node|Acc];
- Error ->
- io:format("Could not start cover on ~w: ~p\n",
- [Node,Error]),
- Acc
- end
- end,
- [],
- Nodes),
-
- %% In case some of the compiled modules have been unloaded they
- %% should not be loaded on the new node.
- {_LoadedModules,Compiled} =
- get_compiled_still_loaded(State#main_state.nodes,
- State#main_state.compiled),
- remote_load_compiled(StartedNodes,Compiled),
-
- State1 =
- State#main_state{nodes = State#main_state.nodes ++ StartedNodes,
- compiled = Compiled},
+ {StartedNodes,State1} = do_start_nodes(Nodes, State),
reply(From, {ok,StartedNodes}),
main_process_loop(State1);
@@ -810,9 +791,24 @@ main_process_loop(State) ->
main_process_loop(S);
{'DOWN', _MRef, process, {?SERVER,Node}, _Info} ->
- %% A remote cover_server is down, remove node from list.
- Nodes1 = State#main_state.nodes--[Node],
- main_process_loop(State#main_state{nodes=Nodes1});
+ %% A remote cover_server is down, mark as lost
+ Nodes = State#main_state.nodes--[Node],
+ Lost = [Node|State#main_state.lost_nodes],
+ main_process_loop(State#main_state{nodes=Nodes,lost_nodes=Lost});
+
+ {nodeup,Node} ->
+ State1 =
+ case lists:member(Node,State#main_state.lost_nodes) of
+ true ->
+ sync_compiled(Node,State);
+ false ->
+ State
+ end,
+ main_process_loop(State1);
+
+ {nodedown,_} ->
+ %% Will be taken care of when 'DOWN' message arrives
+ main_process_loop(State);
{From, get_main_node} ->
reply(From, node()),
@@ -874,8 +870,13 @@ remote_process_loop(State) ->
unregister(?SERVER),
ok; % not replying since 'DOWN' message will be received anyway
+ {remote,get_compiled} ->
+ remote_reply(State#remote_state.main_node,
+ State#remote_state.compiled),
+ remote_process_loop(State);
+
{From, get_main_node} ->
- reply(From, State#remote_state.main_node),
+ remote_reply(From, State#remote_state.main_node),
remote_process_loop(State);
get_status ->
@@ -987,6 +988,36 @@ unload([]) ->
%%%--Handling of remote nodes--------------------------------------------
+do_start_nodes(Nodes, State) ->
+ ThisNode = node(),
+ StartedNodes =
+ lists:foldl(
+ fun(Node,Acc) ->
+ case rpc:call(Node,cover,remote_start,[ThisNode]) of
+ {ok,_RPid} ->
+ erlang:monitor(process,{?SERVER,Node}),
+ [Node|Acc];
+ Error ->
+ io:format("Could not start cover on ~w: ~p\n",
+ [Node,Error]),
+ Acc
+ end
+ end,
+ [],
+ Nodes),
+
+ %% In case some of the compiled modules have been unloaded they
+ %% should not be loaded on the new node.
+ {_LoadedModules,Compiled} =
+ get_compiled_still_loaded(State#main_state.nodes,
+ State#main_state.compiled),
+ remote_load_compiled(StartedNodes,Compiled),
+
+ State1 =
+ State#main_state{nodes = State#main_state.nodes ++ StartedNodes,
+ compiled = Compiled},
+ {StartedNodes, State1}.
+
%% start the cover_server on a remote node
remote_start(MainNode) ->
case whereis(?SERVER) of
@@ -1010,6 +1041,30 @@ remote_start(MainNode) ->
{error,{already_started,Pid}}
end.
+%% If a lost node comes back, ensure that main and remote node has the
+%% same cover compiled modules. Note that no action is taken if the
+%% same {Mod,File} eksists on both, i.e. code change is not handled!
+sync_compiled(Node,State) ->
+ #main_state{compiled=Compiled0,nodes=Nodes,lost_nodes=Lost}=State,
+ State1 =
+ case remote_call(Node,{remote,get_compiled}) of
+ {error,node_dead} ->
+ {_,S} = do_start_nodes([Node],State),
+ S;
+ {error,_} ->
+ State;
+ RemoteCompiled ->
+ {_,Compiled} = get_compiled_still_loaded(Nodes,Compiled0),
+ Unload = [UM || {UM,_}=U <- RemoteCompiled,
+ false == lists:member(U,Compiled)],
+ remote_unload([Node],Unload),
+ Load = [L || L <- Compiled,
+ false == lists:member(L,RemoteCompiled)],
+ remote_load_compiled([Node],Load),
+ State#main_state{compiled=Compiled, nodes=[Node|Nodes]}
+ end,
+ State1#main_state{lost_nodes=Lost--[Node]}.
+
%% Load a set of cover compiled modules on remote nodes,
%% We do it ?MAX_MODS modules at a time so that we don't
%% run out of memory on the cover_server node.
@@ -2279,7 +2334,13 @@ do_reset2([]) ->
do_clear(Module) ->
ets:match_delete(?COVER_CLAUSE_TABLE, {Module,'_'}),
ets:match_delete(?COVER_TABLE, {#bump{module=Module},'_'}),
- ets:match_delete(?COLLECTION_TABLE, {#bump{module=Module},'_'}).
+ case lists:member(?COLLECTION_TABLE, ets:all()) of
+ true ->
+ %% We're on the main node
+ ets:match_delete(?COLLECTION_TABLE, {#bump{module=Module},'_'});
+ false ->
+ ok
+ end.
not_loaded(Module, unloaded, State) ->
do_clear(Module),
diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl
index 950c4ddbcb..3bf1b44af8 100644
--- a/lib/tools/test/cover_SUITE.erl
+++ b/lib/tools/test/cover_SUITE.erl
@@ -23,7 +23,7 @@
init_per_group/2,end_per_group/2]).
-export([start/1, compile/1, analyse/1, misc/1, stop/1,
- distribution/1, export_import/1,
+ distribution/1, reconnect/1, die_and_reconnect/1, export_import/1,
otp_5031/1, eif/1, otp_5305/1, otp_5418/1, otp_6115/1, otp_7095/1,
otp_8188/1, otp_8270/1, otp_8273/1, otp_8340/1]).
@@ -45,7 +45,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
case whereis(cover_server) of
undefined ->
- [start, compile, analyse, misc, stop, distribution,
+ [start, compile, analyse, misc, stop,
+ distribution, reconnect, die_and_reconnect,
export_import, otp_5031, eif, otp_5305, otp_5418,
otp_6115, otp_7095, otp_8188, otp_8270, otp_8273,
otp_8340];
@@ -408,20 +409,117 @@ distribution(Config) when is_list(Config) ->
?line true = is_unloaded(LocalBeam),
?line true = is_unloaded(N2Beam),
- %% Check that cover_server on remote node dies if main node dies
+ %% Check that cover_server on remote node does not die if main node dies
?line {ok,[N1]} = cover:start(N1),
- ?line true = is_pid(rpc:call(N1,erlang,whereis,[cover_server])),
+ ?line true = is_pid(N1Server = rpc:call(N1,erlang,whereis,[cover_server])),
?line exit(whereis(cover_server),kill),
- ?line timer:sleep(10),
- ?line undefined = rpc:call(N1,erlang,whereis,[cover_server]),
-
+ ?line timer:sleep(100),
+ ?line N1Server = rpc:call(N1,erlang,whereis,[cover_server]),
+
%% Cleanup
?line Files = lsfiles(),
?line remove(files(Files, ".beam")),
?line ?t:stop_node(N1),
?line ?t:stop_node(N2).
-
+%% Test that a lost node is reconnected
+reconnect(Config) ->
+ DataDir = ?config(data_dir, Config),
+ ok = file:set_cwd(DataDir),
+
+ {ok,a} = compile:file(a),
+ {ok,b} = compile:file(b),
+ {ok,f} = compile:file(f),
+
+ {ok,N1} = ?t:start_node(cover_SUITE_reconnect,peer,
+ [{args," -pa " ++ DataDir},{start_cover,false}]),
+ {ok,a} = cover:compile(a),
+ {ok,f} = cover:compile(f),
+ {ok,[N1]} = cover:start(nodes()),
+
+ %% Some calls to check later
+ rpc:call(N1,f,f1,[]),
+ cover:flush(N1),
+ rpc:call(N1,f,f1,[]),
+
+ %% This will cause a call to f:f2() when nodes()==[] on N1
+ rpc:cast(N1,f,call_f2_when_isolated,[]),
+
+ %% Disconnect and check that node is removed from main cover node
+ net_kernel:disconnect(N1),
+ [] = cover:which_nodes(),
+ timer:sleep(500), % allow some time for the f:f2() call
+
+ %% Do some add one module (b) and remove one module (a)
+ code:purge(a),
+ {module,a} = code:load_file(a),
+ {ok,b} = cover:compile(b),
+ cover_compiled = code:which(b),
+
+ [] = cover:which_nodes(),
+ check_f_calls(1,0), % only the first call - before the flush
+
+ %% Reconnect the node and check that b and f are cover compiled but not a
+ net_kernel:connect_node(N1),
+ timer:sleep(100),
+ [N1] = cover:which_nodes(), % we are reconnected
+ cover_compiled = rpc:call(N1,code,which,[b]),
+ cover_compiled = rpc:call(N1,code,which,[f]),
+ ABeam = rpc:call(N1,code,which,[a]),
+ false = (cover_compiled==ABeam),
+
+ %% Ensure that we have:
+ %% * one f1 call from before the flush,
+ %% * one f1 call from after the flush but before disconnect
+ %% * one f2 call when disconnected
+ check_f_calls(2,1),
+
+ cover:stop(),
+ ?t:stop_node(N1),
+ ok.
+
+%% Test that a lost node is reconnected - also if it has been dead
+die_and_reconnect(Config) ->
+ DataDir = ?config(data_dir, Config),
+ ok = file:set_cwd(DataDir),
+
+ {ok,f} = compile:file(f),
+
+ NodeName = cover_SUITE_die_and_reconnect,
+ {ok,N1} = ?t:start_node(NodeName,peer,
+ [{args," -pa " ++ DataDir},{start_cover,false}]),
+ %% {ok,a} = cover:compile(a),
+ {ok,f} = cover:compile(f),
+ {ok,[N1]} = cover:start(nodes()),
+
+ %% Some calls to check later
+ rpc:call(N1,f,f1,[]),
+ cover:flush(N1),
+ rpc:call(N1,f,f1,[]),
+
+ %% Kill the node
+ rpc:call(N1,erlang,halt,[]),
+ [] = cover:which_nodes(),
+
+ check_f_calls(1,0), % only the first call - before the flush
+
+ %% Restart the node and check that cover reconnects
+ {ok,N1} = ?t:start_node(NodeName,peer,
+ [{args," -pa " ++ DataDir},{start_cover,false}]),
+ timer:sleep(100),
+ [N1] = cover:which_nodes(), % we are reconnected
+ cover_compiled = rpc:call(N1,code,which,[f]),
+
+ %% One more call...
+ rpc:call(N1,f,f1,[]),
+
+ %% Ensure that no more calls are counted
+ check_f_calls(2,0),
+
+ cover:stop(),
+ ?t:stop_node(N1),
+ ok.
+
export_import(suite) -> [];
export_import(Config) when is_list(Config) ->
?line DataDir = ?config(data_dir, Config),
@@ -1253,4 +1351,4 @@ is_unloaded(What) ->
end.
check_f_calls(F1,F2) ->
- {ok,[{{f,f1,0},F1},{{f,f2,0},F2}]} = cover:analyse(f,calls,function).
+ {ok,[{{f,f1,0},F1},{{f,f2,0},F2}|_]} = cover:analyse(f,calls,function).
diff --git a/lib/tools/test/cover_SUITE_data/f.erl b/lib/tools/test/cover_SUITE_data/f.erl
index 1ef8bbdb49..ce2963014a 100644
--- a/lib/tools/test/cover_SUITE_data/f.erl
+++ b/lib/tools/test/cover_SUITE_data/f.erl
@@ -1,5 +1,5 @@
-module(f).
--export([f1/0,f2/0]).
+-export([f1/0,f2/0,call_f2_when_isolated/0]).
f1() ->
f1_line1,
@@ -8,3 +8,12 @@ f1() ->
f2() ->
f2_line1,
f2_line2.
+
+call_f2_when_isolated() ->
+ case nodes() of
+ [] ->
+ f2();
+ _ ->
+ timer:sleep(100),
+ call_f2_when_isolated()
+ end.