%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2016. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
-module(erts_code_purger).
%% Purpose : Implement system process erts_code_purger
%% to handle code module purging.
-export([start/0, purge/1, soft_purge/1, pending_purge_lambda/3,
finish_after_on_load/2]).
-spec start() -> term().
start() ->
register(erts_code_purger, self()),
process_flag(trap_exit, true),
loop().
loop() ->
_ = receive
{purge,Mod,From,Ref} when is_atom(Mod), is_pid(From) ->
Res = do_purge(Mod),
From ! {reply, purge, Res, Ref};
{soft_purge,Mod,From,Ref} when is_atom(Mod), is_pid(From) ->
Res = do_soft_purge(Mod),
From ! {reply, soft_purge, Res, Ref};
{finish_after_on_load,{Mod,Keep},From,Ref}
when is_atom(Mod), is_pid(From) ->
Res = do_finish_after_on_load(Mod, Keep),
From ! {reply, finish_after_on_load, Res, Ref};
{test_purge, Mod, From, Type, Ref} when is_atom(Mod), is_pid(From) ->
do_test_purge(Mod, From, Type, Ref);
_Other -> ignore
end,
loop().
%%
%% Processes that tries to call a fun that belongs to
%% a module that currently is being purged will end
%% up here (pending_purge_lambda) in a suspended state.
%% When the purge operation completes or aborts (soft
%% purge that failed) these processes will be resumed.
%%
pending_purge_lambda(_Module, Fun, Args) ->
%%
%% When the process is resumed, the following
%% scenarios exist:
%% * The code that the fun refers to is still
%% there due to a failed soft purge. The
%% call to the fun will succeed via apply/2.
%% * The code was purged, and a current version
%% of the module is loaded which does not
%% contain this fun. The call will result
%% in an exception being raised.
%% * The code was purged, and no current
%% version of the module is loaded. An attempt
%% to load the module (via the error_handler)
%% will be made. This may or may not succeed.
%% If the module is loaded, it may or may
%% not contain the fun. The call will
%% succeed if the error_handler was able
%% to load the module and loaded module
%% contains this fun; otherwise, an exception
%% will be raised.
%%
apply(Fun, Args).
%% purge(Module)
%% Kill all processes running code from *old* Module, and then purge the
%% module. Return {WasOld, DidKill}:
%% {false, false} there was no old module to purge
%% {true, false} module purged, no process killed
%% {true, true} module purged, at least one process killed
purge(Mod) when is_atom(Mod) ->
Ref = make_ref(),
erts_code_purger ! {purge, Mod, self(), Ref},
receive
{reply, purge, Result, Ref} ->
Result
end.
do_purge(Mod) ->
case erts_internal:purge_module(Mod, prepare) of
false ->
{false, false};
true ->
DidKill = check_proc_code(erlang:processes(), Mod, true),
true = erts_internal:purge_module(Mod, complete),
{true, DidKill}
end.
%% soft_purge(Module)
%% Purge old code only if no procs remain that run old code.
%% Return true in that case, false if procs remain (in this
%% case old code is not purged)
soft_purge(Mod) ->
Ref = make_ref(),
erts_code_purger ! {soft_purge, Mod, self(), Ref},
receive
{reply, soft_purge, Result, Ref} ->
Result
end.
do_soft_purge(Mod) ->
case erts_internal:purge_module(Mod, prepare) of
false ->
true;
true ->
Res = check_proc_code(erlang:processes(), Mod, false),
erts_internal:purge_module(Mod,
case Res of
false -> abort;
true -> complete
end)
end.
%% finish_after_on_load(Module, Keep)
%% Finish after running on_load function. If Keep is false,
%% purge the code for the on_load function.
finish_after_on_load(Mod, Keep) ->
Ref = make_ref(),
erts_code_purger ! {finish_after_on_load, {Mod,Keep}, self(), Ref},
receive
{reply, finish_after_on_load, Result, Ref} ->
Result
end.
do_finish_after_on_load(Mod, Keep) ->
erlang:finish_after_on_load(Mod, Keep),
case Keep of
true ->
ok;
false ->
case erts_internal:purge_module(Mod, prepare_on_load) of
false ->
true;
true ->
_ = check_proc_code(erlang:processes(), Mod, true),
true = erts_internal:purge_module(Mod, complete)
end
end.
%%
%% check_proc_code(Pids, Mod, Hard) - Send asynchronous
%% requests to all processes to perform a check_process_code
%% operation. Each process will check their own state and
%% reply with the result. If 'Hard' equals
%% - true, processes that refer 'Mod' will be killed. If
%% any processes were killed true is returned; otherwise,
%% false.
%% - false, and any processes refer 'Mod', false will
%% returned; otherwise, true.
%%
%% Requests will be sent to all processes identified by
%% Pids at once, but without allowing GC to be performed.
%% Check process code operations that are aborted due to
%% GC need, will be restarted allowing GC. However, only
%% ?MAX_CPC_GC_PROCS outstanding operation allowing GC at
%% a time will be allowed. This in order not to blow up
%% memory wise.
%%
%% We also only allow ?MAX_CPC_NO_OUTSTANDING_KILLS
%% outstanding kills. This both in order to avoid flooding
%% our message queue with 'DOWN' messages and limiting the
%% amount of memory used to keep references to all
%% outstanding kills.
%%
%% We maybe should allow more than two outstanding
%% GC requests, but for now we play it safe...
-define(MAX_CPC_GC_PROCS, 2).
-define(MAX_CPC_NO_OUTSTANDING_KILLS, 10).
-record(cpc_static, {hard, module, tag}).
-record(cpc_kill, {outstanding = [],
no_outstanding = 0,
waiting = [],
killed = false}).
check_proc_code(Pids, Mod, Hard) ->
Tag = erlang:make_ref(),
CpcS = #cpc_static{hard = Hard,
module = Mod,
tag = Tag},
check_proc_code(CpcS, cpc_init(CpcS, Pids, 0), 0, [], #cpc_kill{}, true).
check_proc_code(#cpc_static{hard = true}, 0, 0, [],
#cpc_kill{outstanding = [], waiting = [], killed = Killed},
true) ->
%% No outstanding requests. We did a hard check, so result is whether or
%% not we killed any processes...
Killed;
check_proc_code(#cpc_static{hard = false}, 0, 0, [], _KillState, Success) ->
%% No outstanding requests and we did a soft check...
Success;
check_proc_code(#cpc_static{hard = false, tag = Tag} = CpcS, NoReq0, NoGcReq0,
[], _KillState, false) ->
%% Failed soft check; just cleanup the remaining replies corresponding
%% to the requests we've sent...
{NoReq1, NoGcReq1} = receive
{check_process_code, {Tag, _P, GC}, _Res} ->
case GC of
false -> {NoReq0-1, NoGcReq0};
true -> {NoReq0, NoGcReq0-1}
end
end,
check_proc_code(CpcS, NoReq1, NoGcReq1, [], _KillState, false);
check_proc_code(#cpc_static{tag = Tag} = CpcS, NoReq0, NoGcReq0, NeedGC0,
KillState0, Success) ->
%% Check if we should request a GC operation
{NoGcReq1, NeedGC1} = case NoGcReq0 < ?MAX_CPC_GC_PROCS of
GcOpAllowed when GcOpAllowed == false;
NeedGC0 == [] ->
{NoGcReq0, NeedGC0};
_ ->
{NoGcReq0+1, cpc_request_gc(CpcS,NeedGC0)}
end,
%% Wait for a cpc reply or 'DOWN' message
{NoReq1, NoGcReq2, Pid, Result, KillState1} = cpc_recv(Tag,
NoReq0,
NoGcReq1,
KillState0),
%% Check the result of the reply
case Result of
aborted ->
%% Operation aborted due to the need to GC in order to
%% determine if the process is referring the module.
%% Schedule the operation for restart allowing GC...
check_proc_code(CpcS, NoReq1, NoGcReq2, [Pid|NeedGC1], KillState1,
Success);
false ->
%% Process not referring the module; done with this process...
check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, KillState1,
Success);
true ->
%% Process referring the module...
case CpcS#cpc_static.hard of
false ->
%% ... and soft check. The whole operation failed so
%% no point continuing; clean up and fail...
check_proc_code(CpcS, NoReq1, NoGcReq2, [], KillState1,
false);
true ->
%% ... and hard check; schedule kill of it...
check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
cpc_sched_kill(Pid, KillState1), Success)
end;
'DOWN' ->
%% Handled 'DOWN' message
check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
KillState1, Success)
end.
cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = []} = KillState) ->
receive
{check_process_code, {Tag, Pid, GC}, Res} ->
cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
end;
cpc_recv(Tag, NoReq, NoGcReq,
#cpc_kill{outstanding = [R0, R1, R2, R3, R4 | _]} = KillState) ->
receive
{'DOWN', R, process, _, _} when R == R0;
R == R1;
R == R2;
R == R3;
R == R4 ->
cpc_handle_down(NoReq, NoGcReq, R, KillState);
{check_process_code, {Tag, Pid, GC}, Res} ->
cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
end;
cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = [R|_]} = KillState) ->
receive
{'DOWN', R, process, _, _} ->
cpc_handle_down(NoReq, NoGcReq, R, KillState);
{check_process_code, {Tag, Pid, GC}, Res} ->
cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
end.
cpc_handle_down(NoReq, NoGcReq, R, #cpc_kill{outstanding = Rs,
no_outstanding = N} = KillState) ->
{NoReq, NoGcReq, undefined, 'DOWN',
cpc_sched_kill_waiting(KillState#cpc_kill{outstanding = cpc_list_rm(R, Rs),
no_outstanding = N-1})}.
cpc_list_rm(R, [R|Rs]) ->
Rs;
cpc_list_rm(R0, [R1|Rs]) ->
[R1|cpc_list_rm(R0, Rs)].
cpc_handle_cpc(NoReq, NoGcReq, false, Pid, Res, KillState) ->
{NoReq-1, NoGcReq, Pid, Res, KillState};
cpc_handle_cpc(NoReq, NoGcReq, true, Pid, Res, KillState) ->
{NoReq, NoGcReq-1, Pid, Res, KillState}.
cpc_sched_kill_waiting(#cpc_kill{waiting = []} = KillState) ->
KillState;
cpc_sched_kill_waiting(#cpc_kill{outstanding = Rs,
no_outstanding = N,
waiting = [P|Ps]} = KillState) ->
R = erlang:monitor(process, P),
exit(P, kill),
KillState#cpc_kill{outstanding = [R|Rs],
no_outstanding = N+1,
waiting = Ps,
killed = true}.
cpc_sched_kill(Pid, #cpc_kill{no_outstanding = N, waiting = Pids} = KillState)
when N >= ?MAX_CPC_NO_OUTSTANDING_KILLS ->
KillState#cpc_kill{waiting = [Pid|Pids]};
cpc_sched_kill(Pid,
#cpc_kill{outstanding = Rs, no_outstanding = N} = KillState) ->
R = erlang:monitor(process, Pid),
exit(Pid, kill),
KillState#cpc_kill{outstanding = [R|Rs],
no_outstanding = N+1,
killed = true}.
cpc_request(#cpc_static{tag = Tag, module = Mod}, Pid, AllowGc) ->
erts_internal:check_process_code(Pid, Mod, [{async, {Tag, Pid, AllowGc}},
{allow_gc, AllowGc}]).
cpc_request_gc(CpcS, [Pid|Pids]) ->
cpc_request(CpcS, Pid, true),
Pids.
cpc_init(_CpcS, [], NoReqs) ->
NoReqs;
cpc_init(CpcS, [Pid|Pids], NoReqs) ->
cpc_request(CpcS, Pid, false),
cpc_init(CpcS, Pids, NoReqs+1).
% end of check_proc_code() implementation.
%%
%% FOR TESTING ONLY
%%
%% do_test_purge() is for testing only. The purge is done
%% as usual, but the tester can control when to enter the
%% specific phases.
%%
do_test_purge(Mod, From, Type, Ref) when Type == true; Type == false ->
Mon = erlang:monitor(process, From),
Res = case Type of
true -> do_test_hard_purge(Mod, From, Ref, Mon);
false -> do_test_soft_purge(Mod, From, Ref, Mon)
end,
From ! {test_purge, Res, Ref},
erlang:demonitor(Mon, [flush]),
ok;
do_test_purge(_, _, _, _) ->
ok.
do_test_soft_purge(Mod, From, Ref, Mon) ->
PrepRes = erts_internal:purge_module(Mod, prepare),
TestRes = test_progress(started, From, Mon, Ref, ok),
case PrepRes of
false ->
_ = test_progress(continued, From, Mon, Ref, TestRes),
true;
true ->
Res = check_proc_code(erlang:processes(), Mod, false),
_ = test_progress(continued, From, Mon, Ref, TestRes),
erts_internal:purge_module(Mod,
case Res of
false -> abort;
true -> complete
end)
end.
do_test_hard_purge(Mod, From, Ref, Mon) ->
PrepRes = erts_internal:purge_module(Mod, prepare),
TestRes = test_progress(started, From, Mon, Ref, ok),
case PrepRes of
false ->
_ = test_progress(continued, From, Mon, Ref, TestRes),
{false, false};
true ->
DidKill = check_proc_code(erlang:processes(), Mod, true),
_ = test_progress(continued, From, Mon, Ref, TestRes),
true = erts_internal:purge_module(Mod, complete),
{true, DidKill}
end.
test_progress(_State, _From, _Mon, _Ref, died) ->
%% Test process died; continue so we wont
%% leave the system in an inconsistent
%% state...
died;
test_progress(started, From, Mon, Ref, ok) ->
From ! {started, Ref},
receive
{'DOWN', Mon, process, From, _} -> died;
{continue, Ref} -> ok
end;
test_progress(continued, From, Mon, Ref, ok) ->
From ! {continued, Ref},
receive
{'DOWN', Mon, process, From, _} -> died;
{complete, Ref} -> ok
end.