diff options
Diffstat (limited to 'lib/kernel/test/heart_SUITE.erl')
-rw-r--r-- | lib/kernel/test/heart_SUITE.erl | 273 |
1 files changed, 183 insertions, 90 deletions
diff --git a/lib/kernel/test/heart_SUITE.erl b/lib/kernel/test/heart_SUITE.erl index 320b23bea1..45032faf6d 100644 --- a/lib/kernel/test/heart_SUITE.erl +++ b/lib/kernel/test/heart_SUITE.erl @@ -1,24 +1,25 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2016. All Rights Reserved. %% -%% The contents of this file are subject to the Erlang Public License, -%% Version 1.1, (the "License"); you may not use this file except in -%% compliance with the License. You should have received a copy of the -%% Erlang Public License along with this software. If not, it can be -%% retrieved online at http://www.erlang.org/. +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at %% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and limitations -%% under the License. +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. %% %% %CopyrightEnd% %% -module(heart_SUITE). --include_lib("test_server/include/test_server.hrl"). +-include_lib("common_test/include/ct.hrl"). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, start/1, restart/1, @@ -26,46 +27,55 @@ node_start_immediately_after_crash/1, node_start_soon_after_crash/1, set_cmd/1, clear_cmd/1, get_cmd/1, - dont_drop/1, kill_pid/1]). + callback_api/1, + options_api/1, + dont_drop/1, kill_pid/1, heart_no_kill/1]). -export([init_per_testcase/2, end_per_testcase/2]). --export([start_heart_stress/1, mangle/1, suicide_by_heart/0]). +-export([start_heart_stress/1, mangle/1, suicide_by_heart/0, non_suicide_by_heart/0]). -define(DEFAULT_TIMEOUT_SECS, 120). +-define(UNIQ_NODE_NAME, + list_to_atom(?MODULE_STRING ++ "__" ++ + atom_to_list(?FUNCTION_NAME) ++ "_" ++ + integer_to_list(erlang:unique_integer([positive])))). + init_per_testcase(_Func, Config) -> - Dog=test_server:timetrap(test_server:seconds(?DEFAULT_TIMEOUT_SECS)), - [{watchdog, Dog}|Config]. + Config. -end_per_testcase(_Func, Config) -> +end_per_testcase(_Func, _Config) -> Nodes = nodes(), lists:foreach(fun(X) -> NNam = list_to_atom(hd(string:tokens(atom_to_list(X),"@"))), case NNam of heart_test -> - ?t:format(1, "WARNING: Killed ~p~n", [X]), + ct:pal(?HI_VERBOSITY, "WARNING: Killed ~p~n", [X]), rpc:cast(X, erlang, halt, []); _ -> ok end - end, Nodes), - Dog=?config(watchdog, Config), - test_server:timetrap_cancel(Dog). + end, Nodes). %%----------------------------------------------------------------- %% Test suite for heart. %% Should be started in a CC view with: %% erl -sname master -rsh ctrsh %%----------------------------------------------------------------- -suite() -> [{ct_hooks,[ts_install_cth]}]. +suite() -> + [{ct_hooks,[ts_install_cth]}, + {timetrap,{minutes,2}}]. all() -> [ start, restart, reboot, node_start_immediately_after_crash, node_start_soon_after_crash, set_cmd, clear_cmd, get_cmd, - kill_pid + callback_api, + options_api, + kill_pid, + heart_no_kill ]. groups() -> @@ -92,42 +102,40 @@ end_per_suite(Config) when is_list(Config) -> start_check(Type, Name) -> start_check(Type, Name, []). start_check(Type, Name, Envs) -> - Args = case ?t:os_type() of - {win32,_} -> - "-heart " ++ env_encode([{"HEART_COMMAND", no_reboot}|Envs]); + Args = case test_server:os_type() of + {win32,_} -> + "+t50000 -heart " ++ env_encode([{"HEART_COMMAND", no_reboot}|Envs]); _ -> - "-heart " ++ env_encode(Envs) + "+t50000 -heart " ++ env_encode(Envs) end, {ok, Node} = case Type of loose -> loose_node:start(Name, Args, ?DEFAULT_TIMEOUT_SECS); _ -> - ?t:start_node(Name, Type, [{args, Args}]) + test_server:start_node(Name, Type, [{args, Args}]) end, erlang:monitor_node(Node, true), case rpc:call(Node, erlang, whereis, [heart]) of Pid when is_pid(Pid) -> ok; _ -> - test_server:fail(heart_not_started) + ct:fail(heart_not_started) end, {ok, Node}. -start(doc) -> []; -start(suite) -> {req, [{time, 10}]}; start(Config) when is_list(Config) -> - {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), rpc:call(Node, init, reboot, []), receive {nodedown, Node} -> ok - after 2000 -> test_server:fail(node_not_closed) + after 2000 -> ct:fail(node_not_closed) end, - test_server:sleep(5000), + timer:sleep(5000), case net_adm:ping(Node) of pang -> ok; _ -> - test_server:fail(node_rebooted) + ct:fail(node_rebooted) end, test_server:stop_node(Node). @@ -140,34 +148,24 @@ start(Config) when is_list(Config) -> %% restart %% Purpose: %% Check that a node is up and running after a init:restart/0 -restart(doc) -> []; -restart(suite) -> - case ?t:os_type() of - {Fam, _} when Fam == unix; Fam == win32 -> - {req, [{time,10}]}; - _ -> - {skip, "Only run on unix and win32"} - end; restart(Config) when is_list(Config) -> - {ok, Node} = start_check(loose, heart_test), + {ok, Node} = start_check(loose, ?UNIQ_NODE_NAME), rpc:call(Node, init, restart, []), receive {nodedown, Node} -> ok - after 2000 -> - test_server:fail(node_not_closed) + after 5000 -> + ct:fail(node_not_closed) end, - test_server:sleep(5000), + timer:sleep(5000), node_check_up_down(Node, 2000), loose_node:stop(Node). %% reboot %% Purpose: %% Check that a node is up and running after a init:reboot/0 -reboot(doc) -> []; -reboot(suite) -> {req, [{time, 10}]}; reboot(Config) when is_list(Config) -> - {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ @@ -177,9 +175,9 @@ reboot(Config) when is_list(Config) -> {nodedown, Node} -> ok after 2000 -> - test_server:fail(node_not_closed) + ct:fail(node_not_closed) end, - test_server:sleep(5000), + timer:sleep(5000), node_check_up_down(Node, 2000), ok. @@ -191,7 +189,6 @@ reboot(Config) when is_list(Config) -> %% May currently dump core in beam debug build due to lock-order violation %% This should be removed when a non-lockad information retriever is implemented %% for crash dumps -node_start_immediately_after_crash(suite) -> {req, [{time, 10}]}; node_start_immediately_after_crash(Config) when is_list(Config) -> Config2 = ignore_cores:setup(?MODULE, node_start_immediately_after_crash, Config, true), try @@ -202,7 +199,8 @@ node_start_immediately_after_crash(Config) when is_list(Config) -> node_start_immediately_after_crash_test(Config) when is_list(Config) -> - {ok, Node} = start_check(loose, heart_test_imm, [{"ERL_CRASH_DUMP_SECONDS", "0"}]), + {ok, Node} = start_check(loose, ?UNIQ_NODE_NAME, + [{"ERL_CRASH_DUMP_SECONDS", "0"}]), ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ @@ -224,13 +222,13 @@ node_start_immediately_after_crash_test(Config) when is_list(Config) -> T0 = now(), receive {nodedown, Node} -> - test_server:format("Took ~.2f s. for node to go down~n", [timer:now_diff(now(), T0)/1000000]), + io:format("Took ~.2f s. for node to go down~n", [timer:now_diff(now(), T0)/1000000]), ok %% timeout is very liberal here. nodedown is received in about 1 s. on linux (palantir) %% and in about 10 s. on solaris (carcharoth) - after (15000*test_server:timetrap_scale_factor()) -> test_server:fail(node_not_closed) + after (15000*test_server:timetrap_scale_factor()) -> ct:fail(node_not_closed) end, - test_server:sleep(3000), + timer:sleep(3000), node_check_up_down(Node, 2000), loose_node:stop(Node). @@ -243,7 +241,6 @@ node_start_immediately_after_crash_test(Config) when is_list(Config) -> %% May currently dump core in beam debug build due to lock-order violation %% This should be removed when a non-lockad information retriever is implemented %% for crash dumps -node_start_soon_after_crash(suite) -> {req, [{time, 10}]}; node_start_soon_after_crash(Config) when is_list(Config) -> Config2 = ignore_cores:setup(?MODULE, node_start_soon_after_crash, Config, true), try @@ -253,7 +250,8 @@ node_start_soon_after_crash(Config) when is_list(Config) -> end. node_start_soon_after_crash_test(Config) when is_list(Config) -> - {ok, Node} = start_check(loose, heart_test_soon, [{"ERL_CRASH_DUMP_SECONDS", "10"}]), + {ok, Node} = start_check(loose, ?UNIQ_NODE_NAME, + [{"ERL_CRASH_DUMP_SECONDS", "10"}]), ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ @@ -273,9 +271,9 @@ node_start_soon_after_crash_test(Config) when is_list(Config) -> rpc:cast(Node, Mod, do, []), receive {nodedown, Node} -> ok - after (15000*test_server:timetrap_scale_factor()) -> test_server:fail(node_not_closed) + after (15000*test_server:timetrap_scale_factor()) -> ct:fail(node_not_closed) end, - test_server:sleep(20000), + timer:sleep(20000), node_check_up_down(Node, 15000), loose_node:stop(Node). @@ -288,16 +286,15 @@ node_check_up_down(Node, Tmo) -> receive {nodedown, Node} -> ok after Tmo -> - test_server:fail(node_not_closed2) + ct:fail(node_not_closed2) end; _ -> - test_server:fail(node_not_rebooted) + ct:fail(node_not_rebooted) end. %% Only tests bad command, correct behaviour is tested in reboot/1. -set_cmd(suite) -> []; set_cmd(Config) when is_list(Config) -> - {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), Cmd = wrong_atom, {error, {bad_cmd, Cmd}} = rpc:call(Node, heart, set_cmd, [Cmd]), Cmd1 = lists:duplicate(2047, $a), @@ -309,9 +306,8 @@ set_cmd(Config) when is_list(Config) -> stop_node(Node), ok. -clear_cmd(suite) -> {req,[{time,15}]}; clear_cmd(Config) when is_list(Config) -> - {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), ok = rpc:call(Node, heart, set_cmd, [atom_to_list(lib:progname()) ++ " -noshell -heart " ++ name(Node) ++ "&"]), @@ -320,14 +316,14 @@ clear_cmd(Config) when is_list(Config) -> {nodedown, Node} -> ok after 2000 -> - test_server:fail(node_not_closed) + ct:fail(node_not_closed) end, - test_server:sleep(5000), + timer:sleep(5000), case net_adm:ping(Node) of pong -> erlang:monitor_node(Node, true); _ -> - test_server:fail(node_not_rebooted) + ct:fail(node_not_rebooted) end, ok = rpc:call(Node, heart, set_cmd, ["erl -noshell -heart " ++ name(Node) ++ "&"]), @@ -337,35 +333,95 @@ clear_cmd(Config) when is_list(Config) -> {nodedown, Node} -> ok after 2000 -> - test_server:fail(node_not_closed) + ct:fail(node_not_closed) end, - test_server:sleep(5000), + timer:sleep(5000), case net_adm:ping(Node) of pang -> ok; _ -> - test_server:fail(node_rebooted) + ct:fail(node_rebooted) end, ok. -get_cmd(suite) -> []; get_cmd(Config) when is_list(Config) -> - {ok, Node} = start_check(slave, heart_test), + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), Cmd = "test", ok = rpc:call(Node, heart, set_cmd, [Cmd]), {ok, Cmd} = rpc:call(Node, heart, get_cmd, []), stop_node(Node), ok. -dont_drop(suite) -> +callback_api(Config) when is_list(Config) -> + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), + none = rpc:call(Node, heart, get_callback, []), + M0 = self(), + F0 = ok, + {error, {bad_callback, {M0,F0}}} = rpc:call(Node, heart, set_callback, [M0,F0]), + none = rpc:call(Node, heart, get_callback, []), + M1 = lists:duplicate(28, $a), + F1 = lists:duplicate(28, $b), + {error, {bad_callback, {M1,F1}}} = rpc:call(Node, heart, set_callback, [M1,F1]), + none = rpc:call(Node, heart, get_callback, []), + + M2 = heart_check_module, + F2 = cb_ok, + F3 = cb_error, + Code0 = generate(M2, [], [ + atom_to_list(F2) ++ "() -> ok.", + atom_to_list(F3) ++ "() -> exit(\"callback_error (as intended)\")." + ]), + {module, M2} = rpc:call(Node, erlang, load_module, [M2, Code0]), + ok = rpc:call(Node, M2, F2, []), + ok = rpc:call(Node, heart, set_callback, [M2,F2]), + {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []), + ok = rpc:call(Node, heart, clear_callback, []), + none = rpc:call(Node, heart, get_callback, []), + ok = rpc:call(Node, heart, set_callback, [M2,F2]), + {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []), + ok = rpc:call(Node, heart, set_callback, [M2,F3]), + receive {nodedown, Node} -> ok + after 5000 -> ct:fail(node_not_killed) + end, + stop_node(Node), + ok. + +options_api(Config) when is_list(Config) -> + {ok, Node} = start_check(slave, ?UNIQ_NODE_NAME), + none = rpc:call(Node, heart, get_options, []), + M0 = self(), + F0 = ok, + {error, {bad_options, {M0,F0}}} = rpc:call(Node, heart, set_options, [{M0,F0}]), + none = rpc:call(Node, heart, get_options, []), + Ls = lists:duplicate(28, $b), + {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]), + none = rpc:call(Node, heart, get_options, []), + + ok = rpc:call(Node, heart, set_options, [[check_schedulers]]), + {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), + ok = rpc:call(Node, heart, set_options, [[]]), + none = rpc:call(Node, heart, get_options, []), + + ok = rpc:call(Node, heart, set_options, [[check_schedulers]]), + {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), + {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]), + {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), + + receive after 3000 -> ok end, %% wait 3 secs + + ok = rpc:call(Node, heart, set_options, [[]]), + none = rpc:call(Node, heart, get_options, []), + stop_node(Node), + ok. + + %%% Removed as it may crash epmd/distribution in colourful %%% ways. While we ARE finding out WHY, it would %%% be nice for others to be able to run the kernel test suite -%%% without "exploding machines", so thats why I removed it for now. - []; -dont_drop(doc) -> - ["Tests that the heart command does not get dropped when ", - "set just before halt on very high I/O load."]; +%%% without "exploding machines", so that's why I removed it for now. + +%% Tests that the heart command does not get dropped when +%% set just before halt on very high I/O load.. dont_drop(Config) when is_list(Config) -> %%% Have to do it some times to make it happen... [ok,ok,ok,ok,ok,ok,ok,ok,ok,ok] = do_dont_drop(Config,10), @@ -387,7 +443,7 @@ do_dont_drop(Config,N) -> Env = [{"HEART_COMMAND", FirstCmd}], Func = "start_heart_stress", Arg = NN3 ++ "@" ++ Host ++ " " ++ - filename:join(?config(data_dir, Config), "simple_echo"), + filename:join(proplists:get_value(data_dir, Config), "simple_echo"), start_node_run(Name,Env,Func,Arg), case wait_for_any_of(list_to_atom(NN2 ++ "@" ++ Host), list_to_atom(NN3 ++ "@" ++ Host)) of @@ -420,16 +476,13 @@ wait_for_any_of(N1,N2,Times) -> end. -kill_pid(suite) -> - []; -kill_pid(doc) -> - ["Tests that heart kills the old erlang node before executing ", - "heart command."]; +%% Tests that heart kills the old erlang node before executing +%% heart command. kill_pid(Config) when is_list(Config) -> ok = do_kill_pid(Config). do_kill_pid(_Config) -> - Name = heart_test, + Name = ?UNIQ_NODE_NAME, Env = [{"HEART_COMMAND", "nickeNyfikenFarEttJobb"}], {ok,Node} = start_node_run(Name,Env,suicide_by_heart,[]), ok = wait_for_node(Node,15), @@ -439,6 +492,30 @@ do_kill_pid(_Config) -> false end. + +heart_no_kill(suite) -> + []; +heart_no_kill(doc) -> + ["Tests that heart doesn't kill the old erlang node when ", + "HEART_NO_KILL is set."]; +heart_no_kill(Config) when is_list(Config) -> + ok = do_no_kill(Config). + +do_no_kill(_Config) -> + Name = heart_test, + {ok,Node} = start_node_run(Name,[],non_suicide_by_heart,[]), + io:format("Node is ~p~n", [Node]), + ok = wait_for_node(Node,15), + io:format("wait_for_node is ~p~n", [ok]), + erlang:monitor_node(Node, true), + receive {nodedown,Node} -> false + after 30000 -> + io:format("Node didn't die..\n"), + rpc:call(Node,init,stop,[]), + io:format("done init:stop..\n"), + ok + end. + wait_for_node(_,0) -> false; wait_for_node(Node,N) -> @@ -557,18 +634,34 @@ suicide_by_heart() -> sallad end. +non_suicide_by_heart() -> + P = open_port({spawn,"heart -ht 11 -pid "++os:getpid()}, + [exit_status, {env, [{"HEART_NO_KILL", "TRUE"}]}, + {packet,2}]), + receive X -> X end, + %% Just hang and wait for heart to timeout + receive + {P,{exit_status,_}} -> + ok + after + 20000 -> + exit(timeout) + end. + %% generate a module from binary generate(Module, Attributes, FunStrings) -> FunForms = function_forms(FunStrings), Forms = [ - {attribute,1,module,Module}, - {attribute,2,export,[FA || {FA,_} <- FunForms]} - ] ++ [{attribute, 3, A, V}|| {A, V} <- Attributes] ++ + {attribute,a(1),module,Module}, + {attribute,a(2),export,[FA || {FA,_} <- FunForms]} + ] ++ [{attribute, a(3), A, V}|| {A, V} <- Attributes] ++ [ Function || {_, Function} <- FunForms], {ok, Module, Bin} = compile:forms(Forms), Bin. +a(L) -> + erl_anno:new(L). function_forms([]) -> []; function_forms([S|Ss]) -> |