From c89f88ab3ec77ebc1cef180e1ae5c9917b4c7b06 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Tue, 18 Dec 2012 18:19:29 +0100 Subject: [cover] Don't mark stopped node as lost Nodes that were stopped with cover:stop/1 were marked as lost and would be reconnected if a nodeup was later received for a node with the same name. This has been corrected. --- lib/tools/src/cover.erl | 11 ++++++++-- lib/tools/test/cover_SUITE.erl | 47 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/tools/src/cover.erl b/lib/tools/src/cover.erl index 10f14b0a49..e51763b6ee 100644 --- a/lib/tools/src/cover.erl +++ b/lib/tools/src/cover.erl @@ -792,8 +792,15 @@ main_process_loop(State) -> {'DOWN', _MRef, process, {?SERVER,Node}, _Info} -> %% A remote cover_server is down, mark as lost - Nodes = State#main_state.nodes--[Node], - Lost = [Node|State#main_state.lost_nodes], + {Nodes,Lost} = + case lists:member(Node,State#main_state.nodes) of + true -> + N = State#main_state.nodes--[Node], + L = [Node|State#main_state.lost_nodes], + {N,L}; + false -> % node stopped + {State#main_state.nodes,State#main_state.lost_nodes} + end, main_process_loop(State#main_state{nodes=Nodes,lost_nodes=Lost}); {nodeup,Node} -> diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl index 3bf1b44af8..fc30b7453a 100644 --- a/lib/tools/test/cover_SUITE.erl +++ b/lib/tools/test/cover_SUITE.erl @@ -23,7 +23,8 @@ init_per_group/2,end_per_group/2]). -export([start/1, compile/1, analyse/1, misc/1, stop/1, - distribution/1, reconnect/1, die_and_reconnect/1, export_import/1, + distribution/1, reconnect/1, die_and_reconnect/1, + dont_reconnect_after_stop/1, export_import/1, otp_5031/1, eif/1, otp_5305/1, otp_5418/1, otp_6115/1, otp_7095/1, otp_8188/1, otp_8270/1, otp_8273/1, otp_8340/1]). @@ -47,6 +48,7 @@ all() -> undefined -> [start, compile, analyse, misc, stop, distribution, reconnect, die_and_reconnect, + dont_reconnect_after_stop, export_import, otp_5031, eif, otp_5305, otp_5418, otp_6115, otp_7095, otp_8188, otp_8270, otp_8273, otp_8340]; @@ -520,6 +522,49 @@ die_and_reconnect(Config) -> ?t:stop_node(N1), ok. +%% Test that a stopped node is not marked as lost, i.e. that it is not +%% reconnected if it is restarted (OTP-10638) +dont_reconnect_after_stop(Config) -> + DataDir = ?config(data_dir, Config), + ok = file:set_cwd(DataDir), + + {ok,f} = compile:file(f), + + NodeName = cover_SUITE_dont_reconnect_after_stop, + {ok,N1} = ?t:start_node(NodeName,peer, + [{args," -pa " ++ DataDir},{start_cover,false}]), + {ok,f} = cover:compile(f), + {ok,[N1]} = cover:start(nodes()), + + %% A call to check later + rpc:call(N1,f,f1,[]), + + %% Stop cover on the node, then terminate the node + cover:stop(N1), + rpc:call(N1,erlang,halt,[]), + [] = cover:which_nodes(), + + check_f_calls(1,0), + + %% Restart the node and check that cover does not reconnect + {ok,N1} = ?t:start_node(NodeName,peer, + [{args," -pa " ++ DataDir},{start_cover,false}]), + timer:sleep(300), + [] = cover:which_nodes(), + Beam = rpc:call(N1,code,which,[f]), + false = (Beam==cover_compiled), + + %% One more call... + rpc:call(N1,f,f1,[]), + cover:flush(N1), + + %% Ensure that the last call is not counted + check_f_calls(1,0), + + cover:stop(), + ?t:stop_node(N1), + ok. + export_import(suite) -> []; export_import(Config) when is_list(Config) -> ?line DataDir = ?config(data_dir, Config), -- cgit v1.2.3 From c1ecd14be890ec4f2e8a25f00525412a381fcc72 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Wed, 19 Dec 2012 12:25:07 +0100 Subject: [cover] Remove stopped node also from lost_nodes list A nodes that was stopped with cover:stop/1 while marked as lost would not be removed from the list of lost nodes. Therefore, if a nodeup was later received for a node with the same name, it would be reconnected. This has been corrected. --- lib/tools/src/cover.erl | 4 +++- lib/tools/test/cover_SUITE.erl | 50 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/tools/src/cover.erl b/lib/tools/src/cover.erl index e51763b6ee..943eee20d2 100644 --- a/lib/tools/src/cover.erl +++ b/lib/tools/src/cover.erl @@ -705,7 +705,9 @@ main_process_loop(State) -> remote_collect('_',Nodes,true), reply(From, ok), Nodes1 = State#main_state.nodes--Nodes, - main_process_loop(State#main_state{nodes=Nodes1}); + LostNodes1 = State#main_state.lost_nodes--Nodes, + main_process_loop(State#main_state{nodes=Nodes1, + lost_nodes=LostNodes1}); {From, {flush,Nodes}} -> remote_collect('_',Nodes,false), diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl index fc30b7453a..9d2b449460 100644 --- a/lib/tools/test/cover_SUITE.erl +++ b/lib/tools/test/cover_SUITE.erl @@ -24,7 +24,8 @@ -export([start/1, compile/1, analyse/1, misc/1, stop/1, distribution/1, reconnect/1, die_and_reconnect/1, - dont_reconnect_after_stop/1, export_import/1, + dont_reconnect_after_stop/1, stop_node_after_disconnect/1, + export_import/1, otp_5031/1, eif/1, otp_5305/1, otp_5418/1, otp_6115/1, otp_7095/1, otp_8188/1, otp_8270/1, otp_8273/1, otp_8340/1]). @@ -48,7 +49,7 @@ all() -> undefined -> [start, compile, analyse, misc, stop, distribution, reconnect, die_and_reconnect, - dont_reconnect_after_stop, + dont_reconnect_after_stop, stop_node_after_disconnect, export_import, otp_5031, eif, otp_5305, otp_5418, otp_6115, otp_7095, otp_8188, otp_8270, otp_8273, otp_8340]; @@ -565,6 +566,51 @@ dont_reconnect_after_stop(Config) -> ?t:stop_node(N1), ok. +%% Test that a node which is stopped while it is marked as lost is not +%% reconnected if it is restarted (OTP-10638) +stop_node_after_disconnect(Config) -> + DataDir = ?config(data_dir, Config), + ok = file:set_cwd(DataDir), + + {ok,f} = compile:file(f), + + NodeName = cover_SUITE_stop_node_after_disconnect, + {ok,N1} = ?t:start_node(NodeName,peer, + [{args," -pa " ++ DataDir},{start_cover,false}]), + {ok,f} = cover:compile(f), + {ok,[N1]} = cover:start(nodes()), + + %% A call to check later + rpc:call(N1,f,f1,[]), + + %% Flush the node, then terminate the node to make it marked as lost + cover:flush(N1), + rpc:call(N1,erlang,halt,[]), + + check_f_calls(1,0), + + %% Stop cover on node + cover:stop(N1), + + %% Restart the node and check that cover does not reconnect + {ok,N1} = ?t:start_node(NodeName,peer, + [{args," -pa " ++ DataDir},{start_cover,false}]), + timer:sleep(300), + [] = cover:which_nodes(), + Beam = rpc:call(N1,code,which,[f]), + false = (Beam==cover_compiled), + + %% One more call... + rpc:call(N1,f,f1,[]), + cover:flush(N1), + + %% Ensure that the last call is not counted + check_f_calls(1,0), + + cover:stop(), + ?t:stop_node(N1), + ok. + export_import(suite) -> []; export_import(Config) when is_list(Config) -> ?line DataDir = ?config(data_dir, Config), -- cgit v1.2.3 From f6eb49df376b56f746c2e74d8cf159261aee5464 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Wed, 19 Dec 2012 14:07:25 +0100 Subject: [cover] Fix timing dependent bug in cover_SUITE:reconnect Adding a timer:sleep between these two lines: net_kernel:disconnect(N1), [] = cover:which_nodes(), This is to make sure the disconnect is detected by cover before checking that the node is gone. --- lib/tools/test/cover_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl index 9d2b449460..ef3daef173 100644 --- a/lib/tools/test/cover_SUITE.erl +++ b/lib/tools/test/cover_SUITE.erl @@ -450,8 +450,8 @@ reconnect(Config) -> %% Disconnect and check that node is removed from main cover node net_kernel:disconnect(N1), + timer:sleep(500), % allow some to detect disconnect and for f:f2() call [] = cover:which_nodes(), - timer:sleep(500), % allow some time for the f:f2() call %% Do some add one module (b) and remove one module (a) code:purge(a), -- cgit v1.2.3 From 8100f5b8156a446081813fc8167059171d90b817 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Wed, 19 Dec 2012 16:02:18 +0100 Subject: [test_server] Stop cover on node after node is terminated Before terminating slave nodes, test_server calls cover:flush/1 to fetch data from the node without actually stopping cover on this node. If cover is not stopped for the node and a new node with the same name is started, then cover will be started on the new node. To avoid this test_server now calls cover:stop/1 after the slave node is terminated. --- lib/test_server/src/test_server.erl | 69 ++++++++++++++++++--------- lib/test_server/test/test_server_test_lib.erl | 19 +++++--- 2 files changed, 60 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/test_server/src/test_server.erl b/lib/test_server/src/test_server.erl index 14cdfd391a..9f06ac450b 100644 --- a/lib/test_server/src/test_server.erl +++ b/lib/test_server/src/test_server.erl @@ -317,14 +317,21 @@ pmap(Fun,List) -> do_cover_for_node(Node,CoverFunc) -> + do_cover_for_node(Node,CoverFunc,true). +do_cover_for_node(Node,CoverFunc,StickUnstick) -> %% In case a slave node is starting another slave node! I.e. this %% function is executed on a slave node - then the cover function %% must be executed on the master node. This is for instance the %% case in test_server's own tests. MainCoverNode = cover:get_main_node(), - Sticky = unstick_all_sticky(MainCoverNode,Node), + Sticky = + if StickUnstick -> unstick_all_sticky(MainCoverNode,Node); + true -> ok + end, rpc:call(MainCoverNode,cover,CoverFunc,[Node]), - stick_all_sticky(Node,Sticky). + if StickUnstick -> stick_all_sticky(Node,Sticky); + true -> ok + end. unstick_all_sticky(Node) -> unstick_all_sticky(node(),Node). @@ -2186,12 +2193,9 @@ start_node(Name, Type, Options) -> %% Cannot run cover on shielded node or on a node started %% by a shielded node. - Cover = case is_cover() of + Cover = case is_cover(Node) of true -> - not is_shielded(Name) - andalso same_version(Node) - andalso proplists:get_value(start_cover,Options, - true); + proplists:get_value(start_cover,Options,true); false -> false end, @@ -2230,15 +2234,8 @@ wait_for_node(Slave) -> Result = receive {sync_result,R} -> R end, case Result of ok -> - Cover = case is_cover() of - true -> - not is_shielded(Slave) andalso same_version(Slave); - false -> - false - end, - net_adm:ping(Slave), - case Cover of + case is_cover(Slave) of true -> do_cover_for_node(Slave,start); _ -> @@ -2256,12 +2253,9 @@ wait_for_node(Slave) -> %% Kills a (remote) node. %% Also inform test_server_ctrl so it can clean up! stop_node(Slave) -> - Nocover = is_shielded(Slave) orelse not same_version(Slave), - case is_cover() of - true when not Nocover -> - do_cover_for_node(Slave,flush); - _ -> - ok + Cover = is_cover(Slave), + if Cover -> do_cover_for_node(Slave,flush,false); + true -> ok end, group_leader() ! {sync_apply,self(),{test_server_ctrl,stop_node,[Slave]}}, Result = receive {sync_result,R} -> R end, @@ -2273,10 +2267,15 @@ stop_node(Slave) -> {nodedown, Slave} -> format(minor, "Stopped slave node: ~p", [Slave]), format(major, "=node_stop ~p", [Slave]), + if Cover -> do_cover_for_node(Slave,stop,false); + true -> ok + end, true after 30000 -> format("=== WARNING: Node ~p does not seem to terminate.", [Slave]), + erlang:monitor_node(Slave, false), + receive {nodedown, Slave} -> ok after 0 -> ok end, false end; {error, _Reason} -> @@ -2288,9 +2287,27 @@ stop_node(Slave) -> [Slave]), case net_adm:ping(Slave)of pong -> + erlang:monitor_node(Slave, true), slave:stop(Slave), - true; + receive + {nodedown, Slave} -> + format(minor, "Stopped slave node: ~p", [Slave]), + format(major, "=node_stop ~p", [Slave]), + if Cover -> do_cover_for_node(Slave,stop,false); + true -> ok + end, + true + after 30000 -> + format("=== WARNING: Node ~p does not seem to terminate.", + [Slave]), + erlang:monitor_node(Slave, false), + receive {nodedown, Slave} -> ok after 0 -> ok end, + false + end; pang -> + if Cover -> do_cover_for_node(Slave,stop,false); + true -> ok + end, false end end. @@ -2377,6 +2394,14 @@ same_version(Name) -> OtherVersion = rpc:call(Name, erlang, system_info, [version]), ThisVersion =:= OtherVersion. +is_cover(Name) -> + case is_cover() of + true -> + not is_shielded(Name) andalso same_version(Name); + false -> + false + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% temp_name(Stem) -> string() %% Stem = string() diff --git a/lib/test_server/test/test_server_test_lib.erl b/lib/test_server/test/test_server_test_lib.erl index 4e89abf308..d466aa0110 100644 --- a/lib/test_server/test/test_server_test_lib.erl +++ b/lib/test_server/test/test_server_test_lib.erl @@ -83,14 +83,21 @@ start_slave(Config,_Level) -> post_end_per_testcase(_TC, Config, Return, State) -> Node = proplists:get_value(node, Config), - case test_server:is_cover() of - true -> - cover:flush(Node); - false -> - ok + Cover = test_server:is_cover(), + if Cover-> cover:flush(Node); + true -> ok end, + erlang:monitor_node(Node, true), slave:stop(Node), - + receive + {nodedown, Node} -> + if Cover -> cover:stop(Node); + true -> ok + end + after 5000 -> + erlang:monitor_node(Node, false), + receive {nodedown, Node} -> ok after 0 -> ok end %flush + end, {Return, State}. %% Parse an .suite log file -- cgit v1.2.3 From 6e9038f942dbe5f7968c2f6467f3d85a7fe458a2 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Wed, 19 Dec 2012 16:05:08 +0100 Subject: [common_test] Stop cover on slave node after node is terminated Before terminating slave nodes, common_test calls cover:flush/1 to fetch data from the node without actually stopping cover on this node. If cover is not stopped for the node and a new node with the same name is started, then cover will be started on the new node. To avoid this common_test now calls cover:stop/1 after the slave node is terminated. --- lib/common_test/src/ct_slave.erl | 28 +++++++++++++++++++++------- lib/common_test/test/ct_test_support.erl | 31 +++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/common_test/src/ct_slave.erl b/lib/common_test/src/ct_slave.erl index 58633b7de6..1fd8c04f8b 100644 --- a/lib/common_test/src/ct_slave.erl +++ b/lib/common_test/src/ct_slave.erl @@ -449,15 +449,29 @@ wait_for_node_alive(Node, N) -> % call init:stop on a remote node do_stop(ENode) -> - case test_server:is_cover() of - true -> - MainCoverNode = cover:get_main_node(), - rpc:call(MainCoverNode,cover,flush,[ENode]); - false -> - ok + {Cover,MainCoverNode} = + case test_server:is_cover() of + true -> + Main = cover:get_main_node(), + rpc:call(Main,cover,flush,[ENode]), + {true,Main}; + false -> + {false,undefined} end, spawn(ENode, init, stop, []), - wait_for_node_dead(ENode, 5). + case wait_for_node_dead(ENode, 5) of + {ok,ENode} -> + if Cover -> + %% To avoid that cover is started again if a node + %% with the same name is started later. + rpc:call(MainCoverNode,cover,stop,[ENode]); + true -> + ok + end, + {ok,ENode}; + Error -> + Error + end. % wait N seconds until node is disconnected wait_for_node_dead(Node, 0) -> diff --git a/lib/common_test/test/ct_test_support.erl b/lib/common_test/test/ct_test_support.erl index e5e2e68fcb..fc572aa82f 100644 --- a/lib/common_test/test/ct_test_support.erl +++ b/lib/common_test/test/ct_test_support.erl @@ -117,11 +117,7 @@ end_per_suite(Config) -> CTNode = proplists:get_value(ct_node, Config), PrivDir = proplists:get_value(priv_dir, Config), true = rpc:call(CTNode, code, del_path, [filename:join(PrivDir,"")]), - case test_server:is_cover() of - true -> cover:flush(CTNode); - false -> ok - end, - slave:stop(CTNode), + slave_stop(CTNode), ok. %%%----------------------------------------------------------------- @@ -152,11 +148,7 @@ end_per_testcase(_TestCase, Config) -> case wait_for_ct_stop(CTNode) of %% Common test was not stopped to we restart node. false -> - case test_server:is_cover() of - true -> cover:flush(CTNode); - false -> ok - end, - slave:stop(CTNode), + slave_stop(CTNode), start_slave(Config,proplists:get_value(trace_level,Config)), {fail, "Could not stop common_test"}; true -> @@ -1274,3 +1266,22 @@ rm_files([F | Fs]) -> rm_files([]) -> ok. +%%%----------------------------------------------------------------- +%%% +slave_stop(Node) -> + Cover = test_server:is_cover(), + if Cover-> cover:flush(Node); + true -> ok + end, + erlang:monitor_node(Node, true), + slave:stop(Node), + receive + {nodedown, Node} -> + if Cover -> cover:stop(Node); + true -> ok + end + after 5000 -> + erlang:monitor_node(Node, false), + receive {nodedown, Node} -> ok after 0 -> ok end %flush + end, + ok. -- cgit v1.2.3 From 06ad426384ab6c5f5966c4f5ae87be0fb01f346c Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Fri, 21 Dec 2012 12:16:39 +0100 Subject: [cover] Cleanup by stopping cover between tests If one test failed, the next would sometimes also fail since cover was not stopped. --- lib/tools/test/cover_SUITE.erl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/tools/test/cover_SUITE.erl b/lib/tools/test/cover_SUITE.erl index ef3daef173..57260a3869 100644 --- a/lib/tools/test/cover_SUITE.erl +++ b/lib/tools/test/cover_SUITE.erl @@ -89,8 +89,11 @@ init_per_testcase(TC, Config) when TC =:= misc; init_per_testcase(_TestCase, Config) -> Config. -end_per_testcase(_TestCase, _Config) -> - %cover:stop(), +end_per_testcase(TestCase, _Config) -> + case lists:member(TestCase,[start,compile,analyse,misc]) of + true -> ok; + false -> cover:stop() + end, ok. start(suite) -> []; @@ -999,7 +1002,7 @@ otp_8270(Config) when is_list(Config) -> ok. otp_8273(doc) -> - ["OTP-8270. Bug."]; + ["OTP-8273. Bug."]; otp_8273(suite) -> []; otp_8273(Config) when is_list(Config) -> Test = <<"-module(t). -- cgit v1.2.3