diff options
Diffstat (limited to 'lib/snmp/test/snmp_agent_test_lib.erl')
-rw-r--r-- | lib/snmp/test/snmp_agent_test_lib.erl | 326 |
1 files changed, 245 insertions, 81 deletions
diff --git a/lib/snmp/test/snmp_agent_test_lib.erl b/lib/snmp/test/snmp_agent_test_lib.erl index 6defdadb5a..615d6774b9 100644 --- a/lib/snmp/test/snmp_agent_test_lib.erl +++ b/lib/snmp/test/snmp_agent_test_lib.erl @@ -66,7 +66,7 @@ ]). %% Internal exports --export([wait/5, run/4]). +-export([tc_wait/5, tc_run/4]). -include_lib("kernel/include/file.hrl"). -include_lib("common_test/include/ct.hrl"). @@ -276,87 +276,186 @@ init_case(Config) when is_list(Config) -> %%% configuration. %%%-------------------------------------------------- -try_test(Mod, Func) -> - call(get(mgr_node), ?MODULE, run, [Mod, Func, [], []]). - -try_test(Mod, Func, A) -> - call(get(mgr_node), ?MODULE, run, [Mod, Func, A, []]). - -try_test(Mod, Func, A, Opts) -> - call(get(mgr_node), ?MODULE, run, [Mod, Func, A, Opts]). - -call(N,M,F,A) -> - ?DBG("call -> entry with~n" - " N: ~p~n" - " M: ~p~n" - " F: ~p~n" - " A: ~p~n" - " when~n" - " get(): ~p", - [N,M,F,A,get()]), - spawn(N, ?MODULE, wait, [self(),get(),M,F,A]), +try_test(TcRunMod, TcRunFunc) -> + try_test(TcRunMod, TcRunFunc, []). + +try_test(TcRunMod, TcRunFunc, TcRunArgs) -> + try_test(TcRunMod, TcRunFunc, TcRunArgs, []). + +try_test(TcRunMod, TcRunFunc, TcRunArgs, TcRunOpts) -> + Node = get(mgr_node), + Mod = ?MODULE, + Func = tc_run, + Args = [TcRunMod, TcRunFunc, TcRunArgs, TcRunOpts], + tc_try(Node, Mod, Func, Args). + +%% We spawn a test case runner process on the manager node. +%% The assumption is that the manager shall do something, but +%% not all test cases have the manager perform actions. +%% In some cases we make a rpc call back to the agent node directly +%% and call something in the agent... (for example the info_test +%% test case). +%% We should use link (instead of monitor) in order for the test case +%% timeout cleanup (kills) should have effect on the test case runner +%% process as well. + +tc_try(N, M, F, A) -> + ?PRINT2("tc_try -> entry with" + "~n N: ~p" + "~n M: ~p" + "~n F: ~p" + "~n A: ~p" + "~n when" + "~n get(): ~p" + "~n", [N, + M, F, A, + get()]), + case net_adm:ping(N) of + pong -> + ?PRINT2("tc_try -> ~p still running - start runner~n", [N]), + OldFlag = trap_exit(true), % Make sure we catch it + Runner = spawn_link(N, ?MODULE, tc_wait, [self(), get(), M, F, A]), + await_tc_runner_started(Runner, OldFlag), + await_tc_runner_done(Runner, OldFlag); + pang -> + ?EPRINT2("tc_try -> ~p *not* running~n", [N]), + skip({node_not_running, N}) + end. + +await_tc_runner_started(Runner, OldFlag) -> + ?PRINT2("await tc-runner (~p) start ack~n", [Runner]), receive - {done, {'EXIT', Rn}, Loc} -> - ?DBG("call -> done with exit: " - "~n Rn: ~p" - "~n Loc: ~p", [Rn, Loc]), + {'EXIT', Runner, Reason} -> + ?EPRINT2("TC runner start failed: " + "~n ~p~n", [Reason]), + exit({tx_runner_start_failed, Reason}); + {tc_runner_started, Runner} -> + ?PRINT2("TC runner start acknowledged~n"), + ok + after 10000 -> %% We should *really* not have to wait this long, but... + trap_exit(OldFlag), + unlink_and_flush_exit(Runner), + RunnerInfo = process_info(Runner), + ?EPRINT2("TC runner start timeout: " + "~n ~p", [RunnerInfo]), + %% If we don't get a start ack within 10 seconds, we are f*ed + exit(Runner, kill), + exit({tc_runner_start, timeout, RunnerInfo}) + end. + +await_tc_runner_done(Runner, OldFlag) -> + receive + {'EXIT', Runner, Reason} -> + %% This is not a normal (tc) failure (that is the clause below). + %% Instead the tc runner process crashed, for some reason. So + %% check if have got any system events, and if so, skip. + SysEvs = snmp_test_global_sys_monitor:events(), + if + (SysEvs =:= []) -> + ?EPRINT2("TC runner failed: " + "~n ~p~n", [Reason]), + exit({tx_runner_failed, Reason}); + true -> + ?EPRINT2("TC runner failed when we got system events: " + "~n Reason: ~p" + "~n Sys Events: ~p" + "~n", [Reason, SysEvs]), + skip([{reason, Reason}, {system_events, SysEvs}]) + end; + {tc_runner_done, Runner, {'EXIT', {skip, Reason}}, Loc} -> + ?PRINT2("call -> done with skip: " + "~n Reason: ~p" + "~n Loc: ~p" + "~n", [Reason, Loc]), + trap_exit(OldFlag), + unlink_and_flush_exit(Runner), + put(test_server_loc, Loc), + skip(Reason); + {tc_runner_done, Runner, {'EXIT', Rn}, Loc} -> + ?PRINT2("call -> done with exit: " + "~n Rn: ~p" + "~n Loc: ~p" + "~n", [Rn, Loc]), + trap_exit(OldFlag), + unlink_and_flush_exit(Runner), put(test_server_loc, Loc), exit(Rn); - {done, Ret, _Zed} -> + {tc_runner_done, Runner, Ret, _Zed} -> ?DBG("call -> done:" "~n Ret: ~p" "~n Zed: ~p", [Ret, _Zed]), + trap_exit(OldFlag), + unlink_and_flush_exit(Runner), case Ret of {error, Reason} -> exit(Reason); + {skip, Reason} -> + skip(Reason); OK -> OK end end. -wait(From, Env, M, F, A) -> - ?DBG("wait -> entry with" - "~n From: ~p" - "~n Env: ~p" - "~n M: ~p" - "~n F: ~p" - "~n A: ~p", [From, Env, M, F, A]), +trap_exit(Flag) when is_boolean(Flag) -> + erlang:process_flag(trap_exit, Flag). + +unlink_and_flush_exit(Pid) -> + unlink(Pid), + receive + {'EXIT', Pid, _} -> + ok + after 0 -> + ok + end. + +tc_wait(From, Env, M, F, A) -> + ?PRINT2("tc_wait -> entry with" + "~n From: ~p" + "~n Env: ~p" + "~n M: ~p" + "~n F: ~p" + "~n A: ~p", [From, Env, M, F, A]), + From ! {tc_runner_started, self()}, lists:foreach(fun({K,V}) -> put(K,V) end, Env), - Rn = (catch apply(M, F, A)), - ?DBG("wait -> Rn: ~n~p", [Rn]), - From ! {done, Rn, get(test_server_loc)}, - exit(Rn). - -run(Mod, Func, Args, Opts) -> - ?DBG("run -> entry with" - "~n Mod: ~p" - "~n Func: ~p" - "~n Args: ~p" - "~n Opts: ~p", [Mod, Func, Args, Opts]), - M = get(mib_dir), - Dir = get(mgr_dir), - User = snmp_misc:get_option(user, Opts, "all-rights"), - SecLevel = snmp_misc:get_option(sec_level, Opts, noAuthNoPriv), - EngineID = snmp_misc:get_option(engine_id, Opts, "agentEngine"), + ?PRINT2("tc_wait -> env set - now run tc~n"), + Res = (catch apply(M, F, A)), + ?PRINT2("tc_wait -> tc run done: " + "~n ~p" + "~n", [Res]), + From ! {tc_runner_done, self(), Res, get(test_server_loc)}, + exit(Res). + +tc_run(Mod, Func, Args, Opts) -> + ?PRINT2("tc_run -> entry with" + "~n Mod: ~p" + "~n Func: ~p" + "~n Args: ~p" + "~n Opts: ~p" + "~n", [Mod, Func, Args, Opts]), + (catch snmp_test_mgr:stop()), % If we had a running mgr from a failed case + M = get(mib_dir), + Dir = get(mgr_dir), + User = snmp_misc:get_option(user, Opts, "all-rights"), + SecLevel = snmp_misc:get_option(sec_level, Opts, noAuthNoPriv), + EngineID = snmp_misc:get_option(engine_id, Opts, "agentEngine"), CtxEngineID = snmp_misc:get_option(context_engine_id, Opts, EngineID), - Community = snmp_misc:get_option(community, Opts, "all-rights"), - ?DBG("run -> start crypto app",[]), - _CryptoRes = ?CRYPTO_START(), - ?DBG("run -> Crypto: ~p", [_CryptoRes]), - catch snmp_test_mgr:stop(), % If we had a running mgr from a failed case - StdM = join(code:priv_dir(snmp), "mibs") ++ "/", - Vsn = get(vsn), - ?DBG("run -> config:" - "~n M: ~p" - "~n Vsn: ~p" - "~n Dir: ~p" - "~n User: ~p" - "~n SecLevel: ~p" - "~n EngineID: ~p" - "~n CtxEngineID: ~p" - "~n Community: ~p" - "~n StdM: ~p", - [M,Vsn,Dir,User,SecLevel,EngineID,CtxEngineID,Community,StdM]), + Community = snmp_misc:get_option(community, Opts, "all-rights"), + ?DBG("tc_run -> start crypto app",[]), + _CryptoRes = ?CRYPTO_START(), + ?DBG("tc_run -> Crypto: ~p", [_CryptoRes]), + StdM = join(code:priv_dir(snmp), "mibs") ++ "/", + Vsn = get(vsn), + ?PRINT2("tc_run -> config:" + "~n M: ~p" + "~n Vsn: ~p" + "~n Dir: ~p" + "~n User: ~p" + "~n SecLevel: ~p" + "~n EngineID: ~p" + "~n CtxEngineID: ~p" + "~n Community: ~p" + "~n StdM: ~p" + "~n", [M,Vsn,Dir,User,SecLevel,EngineID,CtxEngineID,Community,StdM]), case snmp_test_mgr:start([%% {agent, snmp_test_lib:hostname()}, {packet_server_debug, true}, {debug, true}, @@ -376,24 +475,45 @@ run(Mod, Func, Args, Opts) -> {mibs, mibs(StdM, M)}]) of {ok, _Pid} -> case (catch apply(Mod, Func, Args)) of + {'EXIT', {skip, Reason}} -> + ?EPRINT2("apply skip detected: " + "~n ~p", [Reason]), + (catch snmp_test_mgr:stop()), + ?SKIP(Reason); {'EXIT', Reason} -> - catch snmp_test_mgr:stop(), - ?FAIL({apply_failed, {Mod, Func, Args}, Reason}); + %% We have hosts (mostly *very* slooow VMs) that + %% can timeout anything. Since we are basically + %% testing communication, we therefor must check + %% for system events at every failure. Grrr! + SysEvs = snmp_test_global_sys_monitor:events(), + (catch snmp_test_mgr:stop()), + if + (SysEvs =:= []) -> + ?EPRINT2("TC runner failed: " + "~n ~p~n", [Reason]), + ?FAIL({apply_failed, {Mod, Func, Args}, Reason}); + true -> + ?EPRINT2("apply exit catched when we got system events: " + "~n Reason: ~p" + "~n Sys Events: ~p" + "~n", [Reason, SysEvs]), + ?SKIP([{reason, Reason}, {system_events, SysEvs}]) + end; Res -> - catch snmp_test_mgr:stop(), + (catch snmp_test_mgr:stop()), Res end; {error, Reason} -> ?EPRINT2("Failed starting (test) manager: " "~n ~p", [Reason]), - catch snmp_test_mgr:stop(), + (catch snmp_test_mgr:stop()), ?line ?FAIL({mgr_start_error, Reason}); Err -> ?EPRINT2("Failed starting (test) manager: " "~n ~p", [Err]), - catch snmp_test_mgr:stop(), + (catch snmp_test_mgr:stop()), ?line ?FAIL({mgr_start_failure, Err}) end. @@ -907,10 +1027,22 @@ expect2(Mod, Line, F) -> %% ---------------------------------------------------------------------- -get_timeout() -> - get_timeout(os:type()). +-define(BASE_REQ_TIMEOUT, 3500). -get_timeout(_) -> 3500. +get_timeout() -> + %% Try to figure out how "fast" a machine is. + %% We assume that the number of schedulers + %% (which depends on the number of core:s) + %% effect the performance of the host... + %% This is obviously not enough. The network + %% also matterns, clock freq or the CPU, ... + %% But its better than what we had before... + case erlang:system_info(schedulers) of + N when is_integer(N) -> + ?BASE_REQ_TIMEOUT + timer:seconds(10 div N); + _ -> + ?BASE_REQ_TIMEOUT + end. receive_pdu(To) -> receive @@ -1083,6 +1215,18 @@ do_expect(trap, Enterp, Generic, Specific, ExpVBs, To) -> {PureE, Generic, Specific, ExpVBs}, {Ent2, G2, Spec2, VBs}}}; + {error, timeout} = Error -> + SysEvs = snmp_test_global_sys_monitor:events(), + io_format_expect("[expecting trap] got timeout when system events:" + "~n ~p", [SysEvs]), + if + (SysEvs =:= []) -> + Error; + true -> + skip({system_events, SysEvs}) + end; + + Error -> Error end. @@ -1184,7 +1328,7 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To) io_format_expect("received unexpected pdu with (11) " "~n Type: ~p" "~n ReqId: ~p" - "~n Errot status: ~p" + "~n Error status: ~p" "~n Error index: ~p", [Type2, ReqId, Err2, Idx2]), {error, @@ -1247,7 +1391,7 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To) io_format_expect("received unexpected pdu with (15) " "~n Type: ~p" "~n ReqId: ~p" - "~n Errot status: ~p" + "~n Error status: ~p" "~n Error index: ~p" "~n Varbinds: ~p", [Type2, ReqId, Err2, Idx2, VBs2]), @@ -1257,10 +1401,23 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To) {Type2, Err2, Idx2, VBs2}, ReqId}}; - Error -> - io_format_expect("received error (16): " + + {error, timeout} = Error -> + SysEvs = snmp_test_global_sys_monitor:events(), + io_format_expect("got timeout (16) when system events:" + "~n ~p", [SysEvs]), + if + (SysEvs =:= []) -> + Error; + true -> + skip({system_events, SysEvs}) + end; + + + Error -> + io_format_expect("received error (17): " "~n Error: ~p", [Error]), - Error + Error end. @@ -1378,12 +1535,15 @@ start_node(Name) -> "" end, %% Do not use start_link!!! (the proc that calls this one is tmp) - ?DBG("start_node -> Args: ~p~n",[Args]), - A = Args ++ " -pa " ++ Pa, + ?DBG("start_node -> Args: ~p~n", [Args]), + A = Args ++ " -pa " ++ Pa ++ + " -s " ++ atom_to_list(snmp_test_sys_monitor) ++ " start" ++ + " -s global sync", case (catch ?START_NODE(Name, A)) of {ok, Node} -> %% Tell the test_server to not clean up things it never started. ?DBG("start_node -> Node: ~p",[Node]), + global:sync(), {ok, Node}; Else -> ?ERR("start_node -> failed with(other): Else: ~p",[Else]), @@ -1701,6 +1861,10 @@ rpc(Node, F, A) -> join(Dir, File) -> filename:join(Dir, File). + +skip(R) -> + exit({skip, R}). + %% await_pdu(To) -> %% await_response(To, pdu). %% |