diff options
| author | Björn-Egil Dahlberg <[email protected]> | 2016-02-25 15:27:29 +0100 | 
|---|---|---|
| committer | Björn-Egil Dahlberg <[email protected]> | 2016-02-25 15:27:29 +0100 | 
| commit | 93d2038e91854d4bc4fd75dac7005719a79949cd (patch) | |
| tree | 6e56bd00815e289a2792bbc2d21ca02ea4791ee7 | |
| parent | 87ed3fe4d7ddbc3c64d182032e9fe054600cf5ba (diff) | |
| parent | 0ca09ee90d6384e74d1b18ab0e05f2c05fc03905 (diff) | |
| download | otp-93d2038e91854d4bc4fd75dac7005719a79949cd.tar.gz otp-93d2038e91854d4bc4fd75dac7005719a79949cd.tar.bz2 otp-93d2038e91854d4bc4fd75dac7005719a79949cd.zip  | |
Merge branch 'egil/heart-callback/OTP-13250' into maint
| -rw-r--r-- | erts/emulator/beam/atom.names | 1 | ||||
| -rw-r--r-- | erts/emulator/beam/bif.tab | 2 | ||||
| -rw-r--r-- | erts/emulator/beam/erl_alloc.types | 2 | ||||
| -rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 15 | ||||
| -rw-r--r-- | erts/emulator/beam/erl_process.c | 97 | ||||
| -rw-r--r-- | erts/emulator/beam/erl_process.h | 1 | ||||
| -rw-r--r-- | erts/etc/common/heart.c | 4 | ||||
| -rw-r--r-- | erts/preloaded/ebin/erts_internal.beam | bin | 5960 -> 6516 bytes | |||
| -rw-r--r-- | erts/preloaded/src/erts_internal.erl | 20 | ||||
| -rw-r--r-- | lib/kernel/doc/src/heart.xml | 63 | ||||
| -rw-r--r-- | lib/kernel/src/heart.erl | 182 | ||||
| -rw-r--r-- | lib/kernel/test/heart_SUITE.erl | 67 | 
12 files changed, 414 insertions, 40 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 07f6492948..cb6d294a41 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -269,6 +269,7 @@ atom getenv  atom gather_gc_info_result  atom gather_io_bytes  atom gather_sched_wall_time_result +atom gather_system_check_result  atom getting_linked  atom getting_unlinked  atom global diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4f0656d174..3177d5dae7 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -174,6 +174,8 @@ bif erts_internal:time_unit/0  bif erts_internal:is_system_process/1 +bif erts_internal:system_check/1 +  # inet_db support  bif erlang:port_set_data/2  bif erlang:port_get_data/1 diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 1ecebdeb07..7738531142 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -369,6 +369,7 @@ type	AINFO_REQ	STANDARD_LOW	SYSTEM		alloc_info_request  type	SCHED_WTIME_REQ	STANDARD_LOW	SYSTEM		sched_wall_time_request  type	GC_INFO_REQ	STANDARD_LOW	SYSTEM		gc_info_request  type	PORT_DATA_HEAP	STANDARD_LOW	SYSTEM		port_data_heap +type	SYS_CHECK_REQ	STANDARD_LOW	SYSTEM		system_check_request  +else # "fullword" @@ -389,6 +390,7 @@ type	AINFO_REQ	SHORT_LIVED	SYSTEM		alloc_info_request  type	SCHED_WTIME_REQ	SHORT_LIVED	SYSTEM		sched_wall_time_request  type	GC_INFO_REQ	SHORT_LIVED	SYSTEM		gc_info_request  type	PORT_DATA_HEAP	STANDARD	SYSTEM		port_data_heap +type	SYS_CHECK_REQ	SHORT_LIVED	SYSTEM		system_check_request  +endif diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 414ff6711a..8bf6877bea 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -65,6 +65,7 @@ static Export* gather_io_bytes_trap = NULL;  static Export *gather_sched_wall_time_res_trap;  static Export *gather_gc_info_res_trap; +static Export *gather_system_check_res_trap;  #define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1) @@ -3784,6 +3785,18 @@ BIF_RETTYPE erts_internal_is_system_process_1(BIF_ALIST_1)      BIF_ERROR(BIF_P, BADARG);  } +BIF_RETTYPE erts_internal_system_check_1(BIF_ALIST_1) +{ +    Eterm res; +    if (ERTS_IS_ATOM_STR("schedulers", BIF_ARG_1)) { +	res = erts_system_check_request(BIF_P); +	if (is_non_value(res)) +	    BIF_RET(am_undefined); +	BIF_TRAP1(gather_system_check_res_trap, BIF_P, res); +    } + +    BIF_ERROR(BIF_P, BADARG); +}  static erts_smp_atomic_t hipe_test_reschedule_flag; @@ -4391,6 +4404,8 @@ erts_bif_info_init(void)  	= erts_export_put(am_erlang, am_gather_gc_info_result, 1);      gather_io_bytes_trap  	= erts_export_put(am_erts_internal, am_gather_io_bytes, 2); +    gather_system_check_res_trap +	= erts_export_put(am_erts_internal, am_gather_system_check_result, 1);      process_info_init();      os_info_init();  } diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index adea745471..99a4011b8f 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -967,11 +967,25 @@ typedef struct {      erts_smp_atomic32_t refc;  } ErtsSchedWallTimeReq; +typedef struct { +    Process *proc; +    Eterm ref; +    Eterm ref_heap[REF_THING_SIZE]; +    Uint req_sched; +    erts_smp_atomic32_t refc; +} ErtsSystemCheckReq; + +  #if !HALFWORD_HEAP  ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(swtreq,  				 ErtsSchedWallTimeReq,  				 5,  				 ERTS_ALC_T_SCHED_WTIME_REQ) + +ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(screq, +				 ErtsSystemCheckReq, +				 5, +				 ERTS_ALC_T_SYS_CHECK_REQ)  #else  static ERTS_INLINE ErtsSchedWallTimeReq *  swtreq_alloc(void) @@ -985,6 +999,19 @@ swtreq_free(ErtsSchedWallTimeReq *ptr)  {      erts_free(ERTS_ALC_T_SCHED_WTIME_REQ, ptr);  } + +static ERTS_INLINE ErtsSystemCheckReq * +screq_alloc(void) +{ +    return erts_alloc(ERTS_ALC_T_SYS_CHECK_REQ, +		      sizeof(ErtsSystemCheckReq)); +} + +static ERTS_INLINE void +screq_free(ErtsSystemCheckReq *ptr) +{ +    erts_free(ERTS_ALC_T_SYS_CHECK_REQ, ptr); +}  #endif  static void @@ -1118,6 +1145,75 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable)      return ref;  } +static void +reply_system_check(void *vscrp) +{ +    ErtsSchedulerData *esdp = erts_get_scheduler_data(); +    ErtsSystemCheckReq *scrp = (ErtsSystemCheckReq *) vscrp; +    ErtsProcLocks rp_locks = (scrp->req_sched == esdp->no ? ERTS_PROC_LOCK_MAIN : 0); +    Process *rp = scrp->proc; +    Eterm msg; +    Eterm *hp = NULL; +    Eterm **hpp; +    Uint sz; +    ErlOffHeap *ohp = NULL; +    ErlHeapFragment *bp = NULL; + +    ASSERT(esdp); +#ifdef ERTS_DIRTY_SCHEDULERS +    ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif + +    sz = REF_THING_SIZE; +    hp = erts_alloc_message_heap(sz, &bp, &ohp, rp, &rp_locks); +    hpp = &hp; +    msg = STORE_NC(hpp, ohp, scrp->ref); + +    erts_queue_message(rp, &rp_locks, bp, msg, NIL); + +    if (scrp->req_sched == esdp->no) +	rp_locks &= ~ERTS_PROC_LOCK_MAIN; + +    if (rp_locks) +	erts_smp_proc_unlock(rp, rp_locks); + +    erts_proc_dec_refc(rp); + +    if (erts_smp_atomic32_dec_read_nob(&scrp->refc) == 0) +	screq_free(vscrp); +} + + +Eterm erts_system_check_request(Process *c_p) { +    ErtsSchedulerData *esdp = ERTS_PROC_GET_SCHDATA(c_p); +    Eterm ref; +    ErtsSystemCheckReq *scrp; +    Eterm *hp; + +    scrp = screq_alloc(); +    ref = erts_make_ref(c_p); +    hp = &scrp->ref_heap[0]; + +    scrp->proc = c_p; +    scrp->ref = STORE_NC(&hp, NULL, ref); +    scrp->req_sched = esdp->no; +    erts_smp_atomic32_init_nob(&scrp->refc, (erts_aint32_t) erts_no_schedulers); + +    erts_proc_add_refc(c_p, (Sint) erts_no_schedulers); + +#ifdef ERTS_SMP +    if (erts_no_schedulers > 1) +	erts_schedule_multi_misc_aux_work(1, +					  erts_no_schedulers, +					  reply_system_check, +					  (void *) scrp); +#endif + +    reply_system_check((void *) scrp); + +    return ref; +} +  static ERTS_INLINE ErtsProcList *  proclist_create(Process *p)  { @@ -5792,6 +5888,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online      init_misc_aux_work();  #if !HALFWORD_HEAP      init_swtreq_alloc(); +    init_screq_alloc();  #endif      erts_atomic32_init_nob(&debug_wait_completed_count, 0); /* debug only */ diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index cbc2696eab..ee8a2e1b7b 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -1483,6 +1483,7 @@ void erts_init_scheduling(int, int  int erts_set_gc_state(Process *c_p, int enable);  Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable); +Eterm erts_system_check_request(Process *c_p);  Eterm erts_gc_info_request(Process *c_p);  Uint64 erts_get_proc_interval(void);  Uint64 erts_ensure_later_proc_interval(Uint64); diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c index 9571b83ffd..1a826221fb 100644 --- a/erts/etc/common/heart.c +++ b/erts/etc/common/heart.c @@ -472,10 +472,6 @@ message_loop(erlin_fd, erlout_fd)  			switch (mp->op) {  			case HEART_BEAT:  				timestamp(&last_received); -#ifdef USE_WATCHDOG -				/* reset the hardware watchdog timer */ -				wd_reset(); -#endif  				break;  			case SHUT_DOWN:  				return R_SHUT_DOWN; diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam Binary files differindex 32d5d70122..21dde7f257 100644 --- a/erts/preloaded/ebin/erts_internal.beam +++ b/erts/preloaded/ebin/erts_internal.beam diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl index 7ed4efea4b..6db77a8482 100644 --- a/erts/preloaded/src/erts_internal.erl +++ b/erts/preloaded/src/erts_internal.erl @@ -35,6 +35,9 @@  -export([port_command/3, port_connect/2, port_close/1,  	 port_control/3, port_call/3, port_info/1, port_info/2]). +-export([system_check/1, +         gather_system_check_result/1]). +  -export([request_system_task/3]).  -export([check_process_code/2]). @@ -197,6 +200,23 @@ request_system_task(_Pid, _Prio, _Request) ->  check_process_code(_Module, _OptionList) ->      erlang:nif_error(undefined). +-spec system_check(Type) -> 'ok' when +      Type :: 'schedulers'. + +system_check(_Type) -> +    erlang:nif_error(undefined). + +gather_system_check_result(Ref) when is_reference(Ref) -> +    gather_system_check_result(Ref, erlang:system_info(schedulers)). + +gather_system_check_result(_Ref, 0) -> +    ok; +gather_system_check_result(Ref, N) -> +    receive +        Ref -> +            gather_system_check_result(Ref, N - 1) +    end. +  %% term compare where integer() < float() = true  -spec cmp_term(A,B) -> Result when diff --git a/lib/kernel/doc/src/heart.xml b/lib/kernel/doc/src/heart.xml index b9fad17ce1..9da4773f2d 100644 --- a/lib/kernel/doc/src/heart.xml +++ b/lib/kernel/doc/src/heart.xml @@ -118,6 +118,13 @@      <p>In the following descriptions, all function fails with reason        <c>badarg</c> if <c>heart</c> is not started.</p>    </description> + +  <datatypes> +      <datatype> +          <name name="heart_option"/> +      </datatype> +  </datatypes> +    <funcs>      <func>        <name name="set_cmd" arity="1"/> @@ -154,6 +161,62 @@            the empty string will be returned.</p>        </desc>      </func> + +    <func> +      <name name="set_callback" arity="2"/> +      <fsummary>Set a validation callback</fsummary> +      <desc> +          <p> This validation callback will be executed before any heartbeat sent +              to the port program. For the validation to succeed it needs to return +              with the value <c>ok</c>. +        </p> +        <p> An exception within the callback will be treated as a validation failure. </p> +        <p> The callback will be removed if the system reboots. </p> +      </desc> +    </func> +    <func> +      <name name="clear_callback" arity="0"/> +      <fsummary>Clear the validation callback</fsummary> +      <desc> +          <p>Removes the validation callback call before heartbeats.</p> +      </desc> +    </func> +    <func> +      <name name="get_callback" arity="0"/> +      <fsummary>Get the validation callback</fsummary> +      <desc> +          <p>Get the validation callback. If the callback is cleared, <c>none</c> will be returned.</p> +      </desc> +    </func> + +    <func> +        <name name="set_options" arity="1"/> +        <fsummary>Set a list of options</fsummary> +        <desc> +            <p> Valid options <c>set_options</c> are: </p> +            <taglist> +                <tag><c>check_schedulers</c></tag> +                <item> +                    <p>If enabled, a signal will be sent to each scheduler to check its +                        responsiveness. The system check occurs before any heartbeat sent +                        to the port program. If any scheduler is not responsive enough the +                        heart program will not receive its heartbeat and thus eventually terminate the node. +                    </p> +                </item> +            </taglist> +            <p> Returns with the value <c>ok</c> if the options are valid.</p> +        </desc> +    </func> +    <func> +      <name name="get_options" arity="0"/> +      <fsummary>Get the temporary reboot command</fsummary> +      <desc> +          <p>Returns <c>{ok, Options}</c> where <c>Options</c> is a list of current options enabled for heart. +              If the callback is cleared, <c>none</c> will be returned.</p> +      </desc> +    </func> + +    </funcs>  </erlref> diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl index 137fad706f..eea78aabdf 100644 --- a/lib/kernel/src/heart.erl +++ b/lib/kernel/src/heart.erl @@ -34,7 +34,11 @@  %%%  %%% It recognizes the flag '-heart'  %%%-------------------------------------------------------------------- --export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]). +-export([start/0, init/2, +         set_cmd/1, clear_cmd/0, get_cmd/0, +         set_callback/2, clear_callback/0, get_callback/0, +         set_options/1, get_options/0, +         cycle/0]).  -define(START_ACK, 1).  -define(HEART_BEAT, 2). @@ -49,6 +53,16 @@  -define(CYCLE_TIMEOUT, 10000).  -define(HEART_PORT_NAME, heart_port). +%% valid heart options +-define(SCHEDULER_CHECK_OPT, check_schedulers). + +-type heart_option() :: ?SCHEDULER_CHECK_OPT. + +-record(state,{port :: port(), +               cmd  :: [] | binary(), +               options :: [heart_option()], +               callback :: 'undefined' | {atom(), atom()}}). +  %%---------------------------------------------------------------------  -spec start() -> 'ignore' | {'error', term()} | {'ok', pid()}. @@ -81,11 +95,11 @@ wait_for_init_ack(From) ->  init(Starter, Parent) ->      process_flag(trap_exit, true),      process_flag(priority, max), -    register(heart, self()), +    register(?MODULE, self()),      case catch start_portprogram() of  	{ok, Port} ->  	    Starter ! {ok, self()}, -	    loop(Parent, Port, ""); +	    loop(Parent, #state{port=Port, cmd=[], options=[]});  	no_heart ->  	    Starter ! {no_heart, self()};  	error -> @@ -96,33 +110,68 @@ init(Starter, Parent) ->        Cmd :: string().  set_cmd(Cmd) -> -    heart ! {self(), set_cmd, Cmd}, +    ?MODULE ! {self(), set_cmd, Cmd},      wait().  -spec get_cmd() -> {ok, Cmd} when        Cmd :: string().  get_cmd() -> -    heart ! {self(), get_cmd}, +    ?MODULE ! {self(), get_cmd},      wait().  -spec clear_cmd() -> ok.  clear_cmd() -> -    heart ! {self(), clear_cmd}, +    ?MODULE ! {self(), clear_cmd}, +    wait(). + +-spec set_callback(Module,Function) -> 'ok' | {'error', {'bad_callback', {Module, Function}}} when +      Module :: atom(), +      Function :: atom(). + +set_callback(Module, Function) -> +    ?MODULE ! {self(), set_callback, {Module,Function}}, +    wait(). + +-spec get_callback() -> {'ok', {Module, Function}} | 'none' when +      Module :: atom(), +      Function :: atom(). + +get_callback() -> +    ?MODULE ! {self(), get_callback}, +    wait(). + +-spec clear_callback() -> ok. + +clear_callback() -> +    ?MODULE ! {self(), clear_callback}, +    wait(). + +-spec set_options(Options) -> 'ok' | {'error', {'bad_options', Options}} when +      Options :: [heart_option()]. + +set_options(Options) -> +    ?MODULE ! {self(), set_options, Options},      wait(). +-spec get_options() -> {'ok', Options} | 'none' when +      Options :: [atom()]. + +get_options() -> +    ?MODULE ! {self(), get_options}, +    wait().  %%% Should be used solely by the release handler!!!!!!!  -spec cycle() -> 'ok' | {'error', term()}.  cycle() -> -    heart ! {self(), cycle}, +    ?MODULE ! {self(), cycle},      wait().  wait() ->      receive -	{heart, Res} -> +	{?MODULE, Res} ->  	    Res      end. @@ -182,8 +231,8 @@ wait_ack(Port) ->  	    {error, Reason}      end. -loop(Parent, Port, Cmd) -> -    _ = send_heart_beat(Port), +loop(Parent, #state{port=Port}=S) -> +    _ = send_heart_beat(S),      receive  	{From, set_cmd, NewCmd0} ->  	    Enc = file:native_name_encoding(), @@ -191,37 +240,72 @@ loop(Parent, Port, Cmd) ->  		NewCmd when is_binary(NewCmd), byte_size(NewCmd) < 2047 ->  		    _ = send_heart_cmd(Port, NewCmd),  		    _ = wait_ack(Port), -		    From ! {heart, ok}, -		    loop(Parent, Port, NewCmd); +		    From ! {?MODULE, ok}, +		    loop(Parent, S#state{cmd=NewCmd});  		_ -> -		    From ! {heart, {error, {bad_cmd, NewCmd0}}}, -		    loop(Parent, Port, Cmd) +		    From ! {?MODULE, {error, {bad_cmd, NewCmd0}}}, +		    loop(Parent, S)  	    end;  	{From, clear_cmd} -> -	    From ! {heart, ok}, -	    _ = send_heart_cmd(Port, ""), +	    From ! {?MODULE, ok}, +	    _ = send_heart_cmd(Port, []),  	    _ = wait_ack(Port), -	    loop(Parent, Port, ""); +	    loop(Parent, S#state{cmd = []});  	{From, get_cmd} -> -	    From ! {heart, get_heart_cmd(Port)}, -	    loop(Parent, Port, Cmd); +	    From ! {?MODULE, get_heart_cmd(Port)}, +            loop(Parent, S); +	{From, set_callback, Callback} -> +            case Callback of +                {M,F} when is_atom(M), is_atom(F) -> +                    From ! {?MODULE, ok}, +                    loop(Parent, S#state{callback=Callback}); +                _ -> +		    From ! {?MODULE, {error, {bad_callback, Callback}}}, +                    loop(Parent, S) +            end; +        {From, get_callback} -> +            Res = case S#state.callback of +                      undefined -> none; +                      Cb -> {ok, Cb} +                  end, +            From ! {?MODULE, Res}, +            loop(Parent, S); +        {From, clear_callback} -> +            From ! {?MODULE, ok}, +            loop(Parent, S#state{callback=undefined}); +	{From, set_options, Options} -> +            case validate_options(Options) of +                Validated when is_list(Validated) -> +                    From ! {?MODULE, ok}, +                    loop(Parent, S#state{options=Validated}); +                _ -> +		    From ! {?MODULE, {error, {bad_options, Options}}}, +                    loop(Parent, S) +            end; +        {From, get_options} -> +            Res = case S#state.options of +                      [] -> none; +                      Cb -> {ok, Cb} +                  end, +            From ! {?MODULE, Res}, +            loop(Parent, S);  	{From, cycle} ->  	    %% Calls back to loop -	    do_cycle_port_program(From, Parent, Port, Cmd);   +	    do_cycle_port_program(From, Parent, S);  	{'EXIT', Parent, shutdown} ->  	    no_reboot_shutdown(Port);  	{'EXIT', Parent, Reason} ->  	    exit(Port, Reason),  	    exit(Reason);  	{'EXIT', Port, badsig} ->  % we can ignore badsig-messages! -	    loop(Parent, Port, Cmd); +	    loop(Parent, S);  	{'EXIT', Port, _Reason} -> -	    exit({port_terminated, {heart, loop, [Parent, Port, Cmd]}}); +	    exit({port_terminated, {?MODULE, loop, [Parent, S]}});  	_ ->  -	    loop(Parent, Port, Cmd) +	    loop(Parent, S)      after  	?TIMEOUT -> -	    loop(Parent, Port, Cmd) +	    loop(Parent, S)      end.  -spec no_reboot_shutdown(port()) -> no_return(). @@ -233,36 +317,44 @@ no_reboot_shutdown(Port) ->  	    exit(normal)      end. -do_cycle_port_program(Caller, Parent, Port, Cmd) -> +validate_options(Opts) -> validate_options(Opts,[]). +validate_options([],Res) -> Res; +validate_options([?SCHEDULER_CHECK_OPT=Opt|Opts],Res) -> validate_options(Opts,[Opt|Res]); +validate_options(_,_) -> error. + +do_cycle_port_program(Caller, Parent, #state{port=Port} = S) ->      unregister(?HEART_PORT_NAME),      case catch start_portprogram() of  	{ok, NewPort} ->  	    _ = send_shutdown(Port),  	    receive  		{'EXIT', Port, _Reason} -> -		    _ = send_heart_cmd(NewPort, Cmd), -		    Caller ! {heart, ok}, -		    loop(Parent, NewPort, Cmd) +		    _ = send_heart_cmd(NewPort, S#state.cmd), +		    Caller ! {?MODULE, ok}, +		    loop(Parent, S#state{port=NewPort})  	    after  		?CYCLE_TIMEOUT ->  		    %% Huh! Two heart port programs running...  		    %% well, the old one has to be sick not to respond  		    %% so we'll settle for the new one... -		    _ = send_heart_cmd(NewPort, Cmd), -		    Caller ! {heart, {error, stop_error}}, -		    loop(Parent, NewPort, Cmd) +		    _ = send_heart_cmd(NewPort, S#state.cmd), +		    Caller ! {?MODULE, {error, stop_error}}, +		    loop(Parent, S#state{port=NewPort})  	    end;  	no_heart -> -	    Caller ! {heart, {error, no_heart}}, -	    loop(Parent, Port, Cmd); +	    Caller ! {?MODULE, {error, no_heart}}, +	    loop(Parent, S);  	error -> -	    Caller ! {heart, {error, start_error}}, -	    loop(Parent, Port, Cmd) +	    Caller ! {?MODULE, {error, start_error}}, +	    loop(Parent, S)      end.  %% "Beates" the heart once. -send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}. +send_heart_beat(#state{port=Port, callback=Cb, options=Opts}) -> +    ok = check_system(Opts), +    ok = check_callback(Cb), +    Port ! {self(), {command, [?HEART_BEAT]}}.  %% Set a new HEART_COMMAND.  -dialyzer({no_improper_lists, send_heart_cmd/2}). @@ -278,6 +370,24 @@ get_heart_cmd(Port) ->  	    {ok, Cmd}      end. +check_system([]) -> ok; +check_system([?SCHEDULER_CHECK_OPT|Opts]) -> +    ok = erts_internal:system_check(schedulers), +    check_system(Opts). + +%% validate system by performing a check before the heartbeat +%% return 'ok' if everything is alright. +%% Terminate if with reason if something is a miss. +%% It is fine to timeout in the callback, in fact that is the intention +%% if something goes wront -> no heartbeat. + +check_callback(Callback) -> +    case Callback of +        undefined -> ok; +        {M,F} -> +            erlang:apply(M,F,[]) +    end. +  %% Sends shutdown command to the port.  send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}. diff --git a/lib/kernel/test/heart_SUITE.erl b/lib/kernel/test/heart_SUITE.erl index 83efbb4c35..39cd29cea0 100644 --- a/lib/kernel/test/heart_SUITE.erl +++ b/lib/kernel/test/heart_SUITE.erl @@ -27,6 +27,8 @@  	 node_start_immediately_after_crash/1,  	 node_start_soon_after_crash/1,  	 set_cmd/1, clear_cmd/1, get_cmd/1, +	 callback_api/1, +         options_api/1,  	 dont_drop/1, kill_pid/1]).  -export([init_per_testcase/2, end_per_testcase/2]). @@ -66,6 +68,8 @@ all() -> [  	node_start_immediately_after_crash,  	node_start_soon_after_crash,  	set_cmd, clear_cmd, get_cmd, +	callback_api, +        options_api,  	kill_pid      ]. @@ -358,6 +362,69 @@ get_cmd(Config) when is_list(Config) ->      stop_node(Node),      ok. +callback_api(Config) when is_list(Config) -> +    {ok, Node} = start_check(slave, heart_test), +    none = rpc:call(Node, heart, get_callback, []), +    M0 = self(), +    F0 = ok, +    {error, {bad_callback, {M0,F0}}} = rpc:call(Node, heart, set_callback, [M0,F0]), +    none = rpc:call(Node, heart, get_callback, []), +    M1 = lists:duplicate(28, $a), +    F1 = lists:duplicate(28, $b), +    {error, {bad_callback, {M1,F1}}} = rpc:call(Node, heart, set_callback, [M1,F1]), +    none = rpc:call(Node, heart, get_callback, []), + +    M2 = heart_check_module, +    F2 = cb_ok, +    F3 = cb_error, +    Code0 = generate(M2, [], [ +	    atom_to_list(F2) ++ "() -> ok.", +            atom_to_list(F3) ++ "() -> exit(\"callback_error (as intended)\")." +	]), +    {module, M2} = rpc:call(Node, erlang, load_module, [M2, Code0]), +    ok = rpc:call(Node, M2, F2, []), +    ok = rpc:call(Node, heart, set_callback, [M2,F2]), +    {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []), +    ok = rpc:call(Node, heart, clear_callback, []), +    none = rpc:call(Node, heart, get_callback, []), +    ok = rpc:call(Node, heart, set_callback, [M2,F2]), +    {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []), +    ok = rpc:call(Node, heart, set_callback, [M2,F3]), +    receive {nodedown, Node} -> ok +    after 5000 -> test_server:fail(node_not_killed) +    end, +    stop_node(Node), +    ok. + +options_api(Config) when is_list(Config) -> +    {ok, Node} = start_check(slave, heart_test), +    none = rpc:call(Node, heart, get_options, []), +    M0 = self(), +    F0 = ok, +    {error, {bad_options, {M0,F0}}} = rpc:call(Node, heart, set_options, [{M0,F0}]), +    none = rpc:call(Node, heart, get_options, []), +    Ls = lists:duplicate(28, $b), +    {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]), +    none = rpc:call(Node, heart, get_options, []), + +    ok = rpc:call(Node, heart, set_options, [[check_schedulers]]), +    {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), +    ok = rpc:call(Node, heart, set_options, [[]]), +    none = rpc:call(Node, heart, get_options, []), + +    ok = rpc:call(Node, heart, set_options, [[check_schedulers]]), +    {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), +    {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]), +    {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []), + +    receive after 3000 -> ok end, %% wait 3 secs + +    ok = rpc:call(Node, heart, set_options, [[]]), +    none = rpc:call(Node, heart, get_options, []), +    stop_node(Node), +    ok. + +  dont_drop(suite) ->   %%% Removed as it may crash epmd/distribution in colourful  %%% ways. While we ARE finding out WHY, it would  | 
