aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn-Egil Dahlberg <[email protected]>2016-02-25 15:27:29 +0100
committerBjörn-Egil Dahlberg <[email protected]>2016-02-25 15:27:29 +0100
commit93d2038e91854d4bc4fd75dac7005719a79949cd (patch)
tree6e56bd00815e289a2792bbc2d21ca02ea4791ee7
parent87ed3fe4d7ddbc3c64d182032e9fe054600cf5ba (diff)
parent0ca09ee90d6384e74d1b18ab0e05f2c05fc03905 (diff)
downloadotp-93d2038e91854d4bc4fd75dac7005719a79949cd.tar.gz
otp-93d2038e91854d4bc4fd75dac7005719a79949cd.tar.bz2
otp-93d2038e91854d4bc4fd75dac7005719a79949cd.zip
Merge branch 'egil/heart-callback/OTP-13250' into maint
-rw-r--r--erts/emulator/beam/atom.names1
-rw-r--r--erts/emulator/beam/bif.tab2
-rw-r--r--erts/emulator/beam/erl_alloc.types2
-rw-r--r--erts/emulator/beam/erl_bif_info.c15
-rw-r--r--erts/emulator/beam/erl_process.c97
-rw-r--r--erts/emulator/beam/erl_process.h1
-rw-r--r--erts/etc/common/heart.c4
-rw-r--r--erts/preloaded/ebin/erts_internal.beambin5960 -> 6516 bytes
-rw-r--r--erts/preloaded/src/erts_internal.erl20
-rw-r--r--lib/kernel/doc/src/heart.xml63
-rw-r--r--lib/kernel/src/heart.erl182
-rw-r--r--lib/kernel/test/heart_SUITE.erl67
12 files changed, 414 insertions, 40 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 07f6492948..cb6d294a41 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -269,6 +269,7 @@ atom getenv
atom gather_gc_info_result
atom gather_io_bytes
atom gather_sched_wall_time_result
+atom gather_system_check_result
atom getting_linked
atom getting_unlinked
atom global
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 4f0656d174..3177d5dae7 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -174,6 +174,8 @@ bif erts_internal:time_unit/0
bif erts_internal:is_system_process/1
+bif erts_internal:system_check/1
+
# inet_db support
bif erlang:port_set_data/2
bif erlang:port_get_data/1
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 1ecebdeb07..7738531142 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -369,6 +369,7 @@ type AINFO_REQ STANDARD_LOW SYSTEM alloc_info_request
type SCHED_WTIME_REQ STANDARD_LOW SYSTEM sched_wall_time_request
type GC_INFO_REQ STANDARD_LOW SYSTEM gc_info_request
type PORT_DATA_HEAP STANDARD_LOW SYSTEM port_data_heap
+type SYS_CHECK_REQ STANDARD_LOW SYSTEM system_check_request
+else # "fullword"
@@ -389,6 +390,7 @@ type AINFO_REQ SHORT_LIVED SYSTEM alloc_info_request
type SCHED_WTIME_REQ SHORT_LIVED SYSTEM sched_wall_time_request
type GC_INFO_REQ SHORT_LIVED SYSTEM gc_info_request
type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap
+type SYS_CHECK_REQ SHORT_LIVED SYSTEM system_check_request
+endif
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 414ff6711a..8bf6877bea 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -65,6 +65,7 @@ static Export* gather_io_bytes_trap = NULL;
static Export *gather_sched_wall_time_res_trap;
static Export *gather_gc_info_res_trap;
+static Export *gather_system_check_res_trap;
#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
@@ -3784,6 +3785,18 @@ BIF_RETTYPE erts_internal_is_system_process_1(BIF_ALIST_1)
BIF_ERROR(BIF_P, BADARG);
}
+BIF_RETTYPE erts_internal_system_check_1(BIF_ALIST_1)
+{
+ Eterm res;
+ if (ERTS_IS_ATOM_STR("schedulers", BIF_ARG_1)) {
+ res = erts_system_check_request(BIF_P);
+ if (is_non_value(res))
+ BIF_RET(am_undefined);
+ BIF_TRAP1(gather_system_check_res_trap, BIF_P, res);
+ }
+
+ BIF_ERROR(BIF_P, BADARG);
+}
static erts_smp_atomic_t hipe_test_reschedule_flag;
@@ -4391,6 +4404,8 @@ erts_bif_info_init(void)
= erts_export_put(am_erlang, am_gather_gc_info_result, 1);
gather_io_bytes_trap
= erts_export_put(am_erts_internal, am_gather_io_bytes, 2);
+ gather_system_check_res_trap
+ = erts_export_put(am_erts_internal, am_gather_system_check_result, 1);
process_info_init();
os_info_init();
}
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index adea745471..99a4011b8f 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -967,11 +967,25 @@ typedef struct {
erts_smp_atomic32_t refc;
} ErtsSchedWallTimeReq;
+typedef struct {
+ Process *proc;
+ Eterm ref;
+ Eterm ref_heap[REF_THING_SIZE];
+ Uint req_sched;
+ erts_smp_atomic32_t refc;
+} ErtsSystemCheckReq;
+
+
#if !HALFWORD_HEAP
ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(swtreq,
ErtsSchedWallTimeReq,
5,
ERTS_ALC_T_SCHED_WTIME_REQ)
+
+ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(screq,
+ ErtsSystemCheckReq,
+ 5,
+ ERTS_ALC_T_SYS_CHECK_REQ)
#else
static ERTS_INLINE ErtsSchedWallTimeReq *
swtreq_alloc(void)
@@ -985,6 +999,19 @@ swtreq_free(ErtsSchedWallTimeReq *ptr)
{
erts_free(ERTS_ALC_T_SCHED_WTIME_REQ, ptr);
}
+
+static ERTS_INLINE ErtsSystemCheckReq *
+screq_alloc(void)
+{
+ return erts_alloc(ERTS_ALC_T_SYS_CHECK_REQ,
+ sizeof(ErtsSystemCheckReq));
+}
+
+static ERTS_INLINE void
+screq_free(ErtsSystemCheckReq *ptr)
+{
+ erts_free(ERTS_ALC_T_SYS_CHECK_REQ, ptr);
+}
#endif
static void
@@ -1118,6 +1145,75 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable)
return ref;
}
+static void
+reply_system_check(void *vscrp)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ ErtsSystemCheckReq *scrp = (ErtsSystemCheckReq *) vscrp;
+ ErtsProcLocks rp_locks = (scrp->req_sched == esdp->no ? ERTS_PROC_LOCK_MAIN : 0);
+ Process *rp = scrp->proc;
+ Eterm msg;
+ Eterm *hp = NULL;
+ Eterm **hpp;
+ Uint sz;
+ ErlOffHeap *ohp = NULL;
+ ErlHeapFragment *bp = NULL;
+
+ ASSERT(esdp);
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
+
+ sz = REF_THING_SIZE;
+ hp = erts_alloc_message_heap(sz, &bp, &ohp, rp, &rp_locks);
+ hpp = &hp;
+ msg = STORE_NC(hpp, ohp, scrp->ref);
+
+ erts_queue_message(rp, &rp_locks, bp, msg, NIL);
+
+ if (scrp->req_sched == esdp->no)
+ rp_locks &= ~ERTS_PROC_LOCK_MAIN;
+
+ if (rp_locks)
+ erts_smp_proc_unlock(rp, rp_locks);
+
+ erts_proc_dec_refc(rp);
+
+ if (erts_smp_atomic32_dec_read_nob(&scrp->refc) == 0)
+ screq_free(vscrp);
+}
+
+
+Eterm erts_system_check_request(Process *c_p) {
+ ErtsSchedulerData *esdp = ERTS_PROC_GET_SCHDATA(c_p);
+ Eterm ref;
+ ErtsSystemCheckReq *scrp;
+ Eterm *hp;
+
+ scrp = screq_alloc();
+ ref = erts_make_ref(c_p);
+ hp = &scrp->ref_heap[0];
+
+ scrp->proc = c_p;
+ scrp->ref = STORE_NC(&hp, NULL, ref);
+ scrp->req_sched = esdp->no;
+ erts_smp_atomic32_init_nob(&scrp->refc, (erts_aint32_t) erts_no_schedulers);
+
+ erts_proc_add_refc(c_p, (Sint) erts_no_schedulers);
+
+#ifdef ERTS_SMP
+ if (erts_no_schedulers > 1)
+ erts_schedule_multi_misc_aux_work(1,
+ erts_no_schedulers,
+ reply_system_check,
+ (void *) scrp);
+#endif
+
+ reply_system_check((void *) scrp);
+
+ return ref;
+}
+
static ERTS_INLINE ErtsProcList *
proclist_create(Process *p)
{
@@ -5792,6 +5888,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
init_misc_aux_work();
#if !HALFWORD_HEAP
init_swtreq_alloc();
+ init_screq_alloc();
#endif
erts_atomic32_init_nob(&debug_wait_completed_count, 0); /* debug only */
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index cbc2696eab..ee8a2e1b7b 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -1483,6 +1483,7 @@ void erts_init_scheduling(int, int
int erts_set_gc_state(Process *c_p, int enable);
Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable);
+Eterm erts_system_check_request(Process *c_p);
Eterm erts_gc_info_request(Process *c_p);
Uint64 erts_get_proc_interval(void);
Uint64 erts_ensure_later_proc_interval(Uint64);
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 9571b83ffd..1a826221fb 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -472,10 +472,6 @@ message_loop(erlin_fd, erlout_fd)
switch (mp->op) {
case HEART_BEAT:
timestamp(&last_received);
-#ifdef USE_WATCHDOG
- /* reset the hardware watchdog timer */
- wd_reset();
-#endif
break;
case SHUT_DOWN:
return R_SHUT_DOWN;
diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam
index 32d5d70122..21dde7f257 100644
--- a/erts/preloaded/ebin/erts_internal.beam
+++ b/erts/preloaded/ebin/erts_internal.beam
Binary files differ
diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl
index 7ed4efea4b..6db77a8482 100644
--- a/erts/preloaded/src/erts_internal.erl
+++ b/erts/preloaded/src/erts_internal.erl
@@ -35,6 +35,9 @@
-export([port_command/3, port_connect/2, port_close/1,
port_control/3, port_call/3, port_info/1, port_info/2]).
+-export([system_check/1,
+ gather_system_check_result/1]).
+
-export([request_system_task/3]).
-export([check_process_code/2]).
@@ -197,6 +200,23 @@ request_system_task(_Pid, _Prio, _Request) ->
check_process_code(_Module, _OptionList) ->
erlang:nif_error(undefined).
+-spec system_check(Type) -> 'ok' when
+ Type :: 'schedulers'.
+
+system_check(_Type) ->
+ erlang:nif_error(undefined).
+
+gather_system_check_result(Ref) when is_reference(Ref) ->
+ gather_system_check_result(Ref, erlang:system_info(schedulers)).
+
+gather_system_check_result(_Ref, 0) ->
+ ok;
+gather_system_check_result(Ref, N) ->
+ receive
+ Ref ->
+ gather_system_check_result(Ref, N - 1)
+ end.
+
%% term compare where integer() < float() = true
-spec cmp_term(A,B) -> Result when
diff --git a/lib/kernel/doc/src/heart.xml b/lib/kernel/doc/src/heart.xml
index b9fad17ce1..9da4773f2d 100644
--- a/lib/kernel/doc/src/heart.xml
+++ b/lib/kernel/doc/src/heart.xml
@@ -118,6 +118,13 @@
<p>In the following descriptions, all function fails with reason
<c>badarg</c> if <c>heart</c> is not started.</p>
</description>
+
+ <datatypes>
+ <datatype>
+ <name name="heart_option"/>
+ </datatype>
+ </datatypes>
+
<funcs>
<func>
<name name="set_cmd" arity="1"/>
@@ -154,6 +161,62 @@
the empty string will be returned.</p>
</desc>
</func>
+
+ <func>
+ <name name="set_callback" arity="2"/>
+ <fsummary>Set a validation callback</fsummary>
+ <desc>
+ <p> This validation callback will be executed before any heartbeat sent
+ to the port program. For the validation to succeed it needs to return
+ with the value <c>ok</c>.
+ </p>
+ <p> An exception within the callback will be treated as a validation failure. </p>
+ <p> The callback will be removed if the system reboots. </p>
+ </desc>
+ </func>
+ <func>
+ <name name="clear_callback" arity="0"/>
+ <fsummary>Clear the validation callback</fsummary>
+ <desc>
+ <p>Removes the validation callback call before heartbeats.</p>
+ </desc>
+ </func>
+ <func>
+ <name name="get_callback" arity="0"/>
+ <fsummary>Get the validation callback</fsummary>
+ <desc>
+ <p>Get the validation callback. If the callback is cleared, <c>none</c> will be returned.</p>
+ </desc>
+ </func>
+
+ <func>
+ <name name="set_options" arity="1"/>
+ <fsummary>Set a list of options</fsummary>
+ <desc>
+ <p> Valid options <c>set_options</c> are: </p>
+ <taglist>
+ <tag><c>check_schedulers</c></tag>
+ <item>
+ <p>If enabled, a signal will be sent to each scheduler to check its
+ responsiveness. The system check occurs before any heartbeat sent
+ to the port program. If any scheduler is not responsive enough the
+ heart program will not receive its heartbeat and thus eventually terminate the node.
+ </p>
+ </item>
+ </taglist>
+ <p> Returns with the value <c>ok</c> if the options are valid.</p>
+ </desc>
+ </func>
+ <func>
+ <name name="get_options" arity="0"/>
+ <fsummary>Get the temporary reboot command</fsummary>
+ <desc>
+ <p>Returns <c>{ok, Options}</c> where <c>Options</c> is a list of current options enabled for heart.
+ If the callback is cleared, <c>none</c> will be returned.</p>
+ </desc>
+ </func>
+
+
</funcs>
</erlref>
diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl
index 137fad706f..eea78aabdf 100644
--- a/lib/kernel/src/heart.erl
+++ b/lib/kernel/src/heart.erl
@@ -34,7 +34,11 @@
%%%
%%% It recognizes the flag '-heart'
%%%--------------------------------------------------------------------
--export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]).
+-export([start/0, init/2,
+ set_cmd/1, clear_cmd/0, get_cmd/0,
+ set_callback/2, clear_callback/0, get_callback/0,
+ set_options/1, get_options/0,
+ cycle/0]).
-define(START_ACK, 1).
-define(HEART_BEAT, 2).
@@ -49,6 +53,16 @@
-define(CYCLE_TIMEOUT, 10000).
-define(HEART_PORT_NAME, heart_port).
+%% valid heart options
+-define(SCHEDULER_CHECK_OPT, check_schedulers).
+
+-type heart_option() :: ?SCHEDULER_CHECK_OPT.
+
+-record(state,{port :: port(),
+ cmd :: [] | binary(),
+ options :: [heart_option()],
+ callback :: 'undefined' | {atom(), atom()}}).
+
%%---------------------------------------------------------------------
-spec start() -> 'ignore' | {'error', term()} | {'ok', pid()}.
@@ -81,11 +95,11 @@ wait_for_init_ack(From) ->
init(Starter, Parent) ->
process_flag(trap_exit, true),
process_flag(priority, max),
- register(heart, self()),
+ register(?MODULE, self()),
case catch start_portprogram() of
{ok, Port} ->
Starter ! {ok, self()},
- loop(Parent, Port, "");
+ loop(Parent, #state{port=Port, cmd=[], options=[]});
no_heart ->
Starter ! {no_heart, self()};
error ->
@@ -96,33 +110,68 @@ init(Starter, Parent) ->
Cmd :: string().
set_cmd(Cmd) ->
- heart ! {self(), set_cmd, Cmd},
+ ?MODULE ! {self(), set_cmd, Cmd},
wait().
-spec get_cmd() -> {ok, Cmd} when
Cmd :: string().
get_cmd() ->
- heart ! {self(), get_cmd},
+ ?MODULE ! {self(), get_cmd},
wait().
-spec clear_cmd() -> ok.
clear_cmd() ->
- heart ! {self(), clear_cmd},
+ ?MODULE ! {self(), clear_cmd},
+ wait().
+
+-spec set_callback(Module,Function) -> 'ok' | {'error', {'bad_callback', {Module, Function}}} when
+ Module :: atom(),
+ Function :: atom().
+
+set_callback(Module, Function) ->
+ ?MODULE ! {self(), set_callback, {Module,Function}},
+ wait().
+
+-spec get_callback() -> {'ok', {Module, Function}} | 'none' when
+ Module :: atom(),
+ Function :: atom().
+
+get_callback() ->
+ ?MODULE ! {self(), get_callback},
+ wait().
+
+-spec clear_callback() -> ok.
+
+clear_callback() ->
+ ?MODULE ! {self(), clear_callback},
+ wait().
+
+-spec set_options(Options) -> 'ok' | {'error', {'bad_options', Options}} when
+ Options :: [heart_option()].
+
+set_options(Options) ->
+ ?MODULE ! {self(), set_options, Options},
wait().
+-spec get_options() -> {'ok', Options} | 'none' when
+ Options :: [atom()].
+
+get_options() ->
+ ?MODULE ! {self(), get_options},
+ wait().
%%% Should be used solely by the release handler!!!!!!!
-spec cycle() -> 'ok' | {'error', term()}.
cycle() ->
- heart ! {self(), cycle},
+ ?MODULE ! {self(), cycle},
wait().
wait() ->
receive
- {heart, Res} ->
+ {?MODULE, Res} ->
Res
end.
@@ -182,8 +231,8 @@ wait_ack(Port) ->
{error, Reason}
end.
-loop(Parent, Port, Cmd) ->
- _ = send_heart_beat(Port),
+loop(Parent, #state{port=Port}=S) ->
+ _ = send_heart_beat(S),
receive
{From, set_cmd, NewCmd0} ->
Enc = file:native_name_encoding(),
@@ -191,37 +240,72 @@ loop(Parent, Port, Cmd) ->
NewCmd when is_binary(NewCmd), byte_size(NewCmd) < 2047 ->
_ = send_heart_cmd(Port, NewCmd),
_ = wait_ack(Port),
- From ! {heart, ok},
- loop(Parent, Port, NewCmd);
+ From ! {?MODULE, ok},
+ loop(Parent, S#state{cmd=NewCmd});
_ ->
- From ! {heart, {error, {bad_cmd, NewCmd0}}},
- loop(Parent, Port, Cmd)
+ From ! {?MODULE, {error, {bad_cmd, NewCmd0}}},
+ loop(Parent, S)
end;
{From, clear_cmd} ->
- From ! {heart, ok},
- _ = send_heart_cmd(Port, ""),
+ From ! {?MODULE, ok},
+ _ = send_heart_cmd(Port, []),
_ = wait_ack(Port),
- loop(Parent, Port, "");
+ loop(Parent, S#state{cmd = []});
{From, get_cmd} ->
- From ! {heart, get_heart_cmd(Port)},
- loop(Parent, Port, Cmd);
+ From ! {?MODULE, get_heart_cmd(Port)},
+ loop(Parent, S);
+ {From, set_callback, Callback} ->
+ case Callback of
+ {M,F} when is_atom(M), is_atom(F) ->
+ From ! {?MODULE, ok},
+ loop(Parent, S#state{callback=Callback});
+ _ ->
+ From ! {?MODULE, {error, {bad_callback, Callback}}},
+ loop(Parent, S)
+ end;
+ {From, get_callback} ->
+ Res = case S#state.callback of
+ undefined -> none;
+ Cb -> {ok, Cb}
+ end,
+ From ! {?MODULE, Res},
+ loop(Parent, S);
+ {From, clear_callback} ->
+ From ! {?MODULE, ok},
+ loop(Parent, S#state{callback=undefined});
+ {From, set_options, Options} ->
+ case validate_options(Options) of
+ Validated when is_list(Validated) ->
+ From ! {?MODULE, ok},
+ loop(Parent, S#state{options=Validated});
+ _ ->
+ From ! {?MODULE, {error, {bad_options, Options}}},
+ loop(Parent, S)
+ end;
+ {From, get_options} ->
+ Res = case S#state.options of
+ [] -> none;
+ Cb -> {ok, Cb}
+ end,
+ From ! {?MODULE, Res},
+ loop(Parent, S);
{From, cycle} ->
%% Calls back to loop
- do_cycle_port_program(From, Parent, Port, Cmd);
+ do_cycle_port_program(From, Parent, S);
{'EXIT', Parent, shutdown} ->
no_reboot_shutdown(Port);
{'EXIT', Parent, Reason} ->
exit(Port, Reason),
exit(Reason);
{'EXIT', Port, badsig} -> % we can ignore badsig-messages!
- loop(Parent, Port, Cmd);
+ loop(Parent, S);
{'EXIT', Port, _Reason} ->
- exit({port_terminated, {heart, loop, [Parent, Port, Cmd]}});
+ exit({port_terminated, {?MODULE, loop, [Parent, S]}});
_ ->
- loop(Parent, Port, Cmd)
+ loop(Parent, S)
after
?TIMEOUT ->
- loop(Parent, Port, Cmd)
+ loop(Parent, S)
end.
-spec no_reboot_shutdown(port()) -> no_return().
@@ -233,36 +317,44 @@ no_reboot_shutdown(Port) ->
exit(normal)
end.
-do_cycle_port_program(Caller, Parent, Port, Cmd) ->
+validate_options(Opts) -> validate_options(Opts,[]).
+validate_options([],Res) -> Res;
+validate_options([?SCHEDULER_CHECK_OPT=Opt|Opts],Res) -> validate_options(Opts,[Opt|Res]);
+validate_options(_,_) -> error.
+
+do_cycle_port_program(Caller, Parent, #state{port=Port} = S) ->
unregister(?HEART_PORT_NAME),
case catch start_portprogram() of
{ok, NewPort} ->
_ = send_shutdown(Port),
receive
{'EXIT', Port, _Reason} ->
- _ = send_heart_cmd(NewPort, Cmd),
- Caller ! {heart, ok},
- loop(Parent, NewPort, Cmd)
+ _ = send_heart_cmd(NewPort, S#state.cmd),
+ Caller ! {?MODULE, ok},
+ loop(Parent, S#state{port=NewPort})
after
?CYCLE_TIMEOUT ->
%% Huh! Two heart port programs running...
%% well, the old one has to be sick not to respond
%% so we'll settle for the new one...
- _ = send_heart_cmd(NewPort, Cmd),
- Caller ! {heart, {error, stop_error}},
- loop(Parent, NewPort, Cmd)
+ _ = send_heart_cmd(NewPort, S#state.cmd),
+ Caller ! {?MODULE, {error, stop_error}},
+ loop(Parent, S#state{port=NewPort})
end;
no_heart ->
- Caller ! {heart, {error, no_heart}},
- loop(Parent, Port, Cmd);
+ Caller ! {?MODULE, {error, no_heart}},
+ loop(Parent, S);
error ->
- Caller ! {heart, {error, start_error}},
- loop(Parent, Port, Cmd)
+ Caller ! {?MODULE, {error, start_error}},
+ loop(Parent, S)
end.
%% "Beates" the heart once.
-send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}.
+send_heart_beat(#state{port=Port, callback=Cb, options=Opts}) ->
+ ok = check_system(Opts),
+ ok = check_callback(Cb),
+ Port ! {self(), {command, [?HEART_BEAT]}}.
%% Set a new HEART_COMMAND.
-dialyzer({no_improper_lists, send_heart_cmd/2}).
@@ -278,6 +370,24 @@ get_heart_cmd(Port) ->
{ok, Cmd}
end.
+check_system([]) -> ok;
+check_system([?SCHEDULER_CHECK_OPT|Opts]) ->
+ ok = erts_internal:system_check(schedulers),
+ check_system(Opts).
+
+%% validate system by performing a check before the heartbeat
+%% return 'ok' if everything is alright.
+%% Terminate if with reason if something is a miss.
+%% It is fine to timeout in the callback, in fact that is the intention
+%% if something goes wront -> no heartbeat.
+
+check_callback(Callback) ->
+ case Callback of
+ undefined -> ok;
+ {M,F} ->
+ erlang:apply(M,F,[])
+ end.
+
%% Sends shutdown command to the port.
send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
diff --git a/lib/kernel/test/heart_SUITE.erl b/lib/kernel/test/heart_SUITE.erl
index 83efbb4c35..39cd29cea0 100644
--- a/lib/kernel/test/heart_SUITE.erl
+++ b/lib/kernel/test/heart_SUITE.erl
@@ -27,6 +27,8 @@
node_start_immediately_after_crash/1,
node_start_soon_after_crash/1,
set_cmd/1, clear_cmd/1, get_cmd/1,
+ callback_api/1,
+ options_api/1,
dont_drop/1, kill_pid/1]).
-export([init_per_testcase/2, end_per_testcase/2]).
@@ -66,6 +68,8 @@ all() -> [
node_start_immediately_after_crash,
node_start_soon_after_crash,
set_cmd, clear_cmd, get_cmd,
+ callback_api,
+ options_api,
kill_pid
].
@@ -358,6 +362,69 @@ get_cmd(Config) when is_list(Config) ->
stop_node(Node),
ok.
+callback_api(Config) when is_list(Config) ->
+ {ok, Node} = start_check(slave, heart_test),
+ none = rpc:call(Node, heart, get_callback, []),
+ M0 = self(),
+ F0 = ok,
+ {error, {bad_callback, {M0,F0}}} = rpc:call(Node, heart, set_callback, [M0,F0]),
+ none = rpc:call(Node, heart, get_callback, []),
+ M1 = lists:duplicate(28, $a),
+ F1 = lists:duplicate(28, $b),
+ {error, {bad_callback, {M1,F1}}} = rpc:call(Node, heart, set_callback, [M1,F1]),
+ none = rpc:call(Node, heart, get_callback, []),
+
+ M2 = heart_check_module,
+ F2 = cb_ok,
+ F3 = cb_error,
+ Code0 = generate(M2, [], [
+ atom_to_list(F2) ++ "() -> ok.",
+ atom_to_list(F3) ++ "() -> exit(\"callback_error (as intended)\")."
+ ]),
+ {module, M2} = rpc:call(Node, erlang, load_module, [M2, Code0]),
+ ok = rpc:call(Node, M2, F2, []),
+ ok = rpc:call(Node, heart, set_callback, [M2,F2]),
+ {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []),
+ ok = rpc:call(Node, heart, clear_callback, []),
+ none = rpc:call(Node, heart, get_callback, []),
+ ok = rpc:call(Node, heart, set_callback, [M2,F2]),
+ {ok, {M2,F2}} = rpc:call(Node, heart, get_callback, []),
+ ok = rpc:call(Node, heart, set_callback, [M2,F3]),
+ receive {nodedown, Node} -> ok
+ after 5000 -> test_server:fail(node_not_killed)
+ end,
+ stop_node(Node),
+ ok.
+
+options_api(Config) when is_list(Config) ->
+ {ok, Node} = start_check(slave, heart_test),
+ none = rpc:call(Node, heart, get_options, []),
+ M0 = self(),
+ F0 = ok,
+ {error, {bad_options, {M0,F0}}} = rpc:call(Node, heart, set_options, [{M0,F0}]),
+ none = rpc:call(Node, heart, get_options, []),
+ Ls = lists:duplicate(28, $b),
+ {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]),
+ none = rpc:call(Node, heart, get_options, []),
+
+ ok = rpc:call(Node, heart, set_options, [[check_schedulers]]),
+ {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []),
+ ok = rpc:call(Node, heart, set_options, [[]]),
+ none = rpc:call(Node, heart, get_options, []),
+
+ ok = rpc:call(Node, heart, set_options, [[check_schedulers]]),
+ {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []),
+ {error, {bad_options, Ls}} = rpc:call(Node, heart, set_options, [Ls]),
+ {ok, [check_schedulers]} = rpc:call(Node, heart, get_options, []),
+
+ receive after 3000 -> ok end, %% wait 3 secs
+
+ ok = rpc:call(Node, heart, set_options, [[]]),
+ none = rpc:call(Node, heart, get_options, []),
+ stop_node(Node),
+ ok.
+
+
dont_drop(suite) ->
%%% Removed as it may crash epmd/distribution in colourful
%%% ways. While we ARE finding out WHY, it would