%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2016. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
-module(erts_code_purger).

%% Purpose : Implement system process erts_code_purger
%%           to handle code module purging.

-export([start/0, purge/1, soft_purge/1, pending_purge_lambda/3,
	 finish_after_on_load/2]).

-spec start() -> term().
start() ->
    register(erts_code_purger, self()),
    process_flag(trap_exit, true),
    loop().

loop() ->
    _ = receive
	{purge,Mod,From,Ref} when is_atom(Mod), is_pid(From) ->
	    Res = do_purge(Mod),
	    From ! {reply, purge, Res, Ref};

	{soft_purge,Mod,From,Ref} when is_atom(Mod), is_pid(From) ->
	    Res = do_soft_purge(Mod),
	    From ! {reply, soft_purge, Res, Ref};

	{finish_after_on_load,{Mod,Keep},From,Ref}
	      when is_atom(Mod), is_pid(From) ->
	    Res = do_finish_after_on_load(Mod, Keep),
	    From ! {reply, finish_after_on_load, Res, Ref};

	{test_purge, Mod, From, Type, Ref} when is_atom(Mod), is_pid(From) ->
	     do_test_purge(Mod, From, Type, Ref);

	_Other -> ignore
    end,
    loop().

%%
%% Processes that tries to call a fun that belongs to
%% a module that currently is being purged will end
%% up here (pending_purge_lambda) in a suspended state.
%% When the purge operation completes or aborts (soft
%% purge that failed) these processes will be resumed.
%%
pending_purge_lambda(_Module, Fun, Args) ->
    %%
    %% When the process is resumed, the following
    %% scenarios exist:
    %% * The code that the fun refers to is still
    %%   there due to a failed soft purge. The
    %%   call to the fun will succeed via apply/2.
    %% * The code was purged, and a current version
    %%   of the module is loaded which does not
    %%   contain this fun. The call will result
    %%   in an exception being raised.
    %% * The code was purged, and no current
    %%   version of the module is loaded. An attempt
    %%   to load the module (via the error_handler)
    %%   will be made. This may or may not succeed.
    %%   If the module is loaded, it may or may
    %%   not contain the fun. The call will
    %%   succeed if the error_handler was able
    %%   to load the module and loaded module
    %%   contains this fun; otherwise, an exception
    %%   will be raised.
    %%
    apply(Fun, Args).

%% purge(Module)
%%  Kill all processes running code from *old* Module, and then purge the
%%  module. Return {WasOld, DidKill}:
%%  {false, false} there was no old module to purge
%%  {true, false} module purged, no process killed
%%  {true, true} module purged, at least one process killed

purge(Mod) when is_atom(Mod) ->
    Ref = make_ref(),
    erts_code_purger ! {purge, Mod, self(), Ref},
    receive
	{reply, purge, Result, Ref} ->
	    Result
    end.

do_purge(Mod) ->
    case erts_internal:purge_module(Mod, prepare) of
	false ->
	    {false, false};
	true ->
	    DidKill = check_proc_code(erlang:processes(), Mod, true),
	    true = erts_internal:purge_module(Mod, complete),
	    {true, DidKill}
    end.

%% soft_purge(Module)
%% Purge old code only if no procs remain that run old code.
%% Return true in that case, false if procs remain (in this
%% case old code is not purged)

soft_purge(Mod) ->
    Ref = make_ref(),
    erts_code_purger ! {soft_purge, Mod, self(), Ref},
    receive
	{reply, soft_purge, Result, Ref} ->
	    Result
    end.

do_soft_purge(Mod) ->
    case erts_internal:purge_module(Mod, prepare) of
	false ->
	    true;
	true ->
	    Res = check_proc_code(erlang:processes(), Mod, false),
	    erts_internal:purge_module(Mod,
				       case Res of
					   false -> abort;
					   true -> complete
				       end)
    end.

%% finish_after_on_load(Module, Keep)
%% Finish after running on_load function. If Keep is false,
%% purge the code for the on_load function.

finish_after_on_load(Mod, Keep) ->
    Ref = make_ref(),
    erts_code_purger ! {finish_after_on_load, {Mod,Keep}, self(), Ref},
    receive
	{reply, finish_after_on_load, Result, Ref} ->
	    Result
    end.

do_finish_after_on_load(Mod, Keep) ->
    erlang:finish_after_on_load(Mod, Keep),
    case Keep of
	true ->
	    ok;
	false ->
	    case erts_internal:purge_module(Mod, prepare_on_load) of
		false ->
		    true;
		true ->
		    _ = check_proc_code(erlang:processes(), Mod, true),
		    true = erts_internal:purge_module(Mod, complete)
	    end
    end.


%%
%% check_proc_code(Pids, Mod, Hard) - Send asynchronous
%%   requests to all processes to perform a check_process_code
%%   operation. Each process will check their own state and
%%   reply with the result. If 'Hard' equals
%%   - true, processes that refer 'Mod' will be killed. If
%%     any processes were killed true is returned; otherwise,
%%     false.
%%   - false, and any processes refer 'Mod', false will
%%     returned; otherwise, true.
%%
%%   Requests will be sent to all processes identified by
%%   Pids at once, but without allowing GC to be performed.
%%   Check process code operations that are aborted due to
%%   GC need, will be restarted allowing GC. However, only
%%   ?MAX_CPC_GC_PROCS outstanding operation allowing GC at
%%   a time will be allowed. This in order not to blow up
%%   memory wise.
%%
%%   We also only allow ?MAX_CPC_NO_OUTSTANDING_KILLS
%%   outstanding kills. This both in order to avoid flooding
%%   our message queue with 'DOWN' messages and limiting the
%%   amount of memory used to keep references to all
%%   outstanding kills.
%%

%% We maybe should allow more than two outstanding
%% GC requests, but for now we play it safe...
-define(MAX_CPC_GC_PROCS, 2).
-define(MAX_CPC_NO_OUTSTANDING_KILLS, 10).

-record(cpc_static, {hard, module, tag}).

-record(cpc_kill, {outstanding = [],
		   no_outstanding = 0,
		   waiting = [],
		   killed = false}).

check_proc_code(Pids, Mod, Hard) ->
    Tag = erlang:make_ref(),
    CpcS = #cpc_static{hard = Hard,
		       module = Mod,
		       tag = Tag},
    check_proc_code(CpcS, cpc_init(CpcS, Pids, 0), 0, [], #cpc_kill{}, true).

check_proc_code(#cpc_static{hard = true}, 0, 0, [],
		#cpc_kill{outstanding = [], waiting = [], killed = Killed},
		true) ->
    %% No outstanding requests. We did a hard check, so result is whether or
    %% not we killed any processes...
    Killed;
check_proc_code(#cpc_static{hard = false}, 0, 0, [], _KillState, Success) ->
    %% No outstanding requests and we did a soft check...
    Success;
check_proc_code(#cpc_static{hard = false, tag = Tag} = CpcS, NoReq0, NoGcReq0,
		[], _KillState, false) ->
    %% Failed soft check; just cleanup the remaining replies corresponding
    %% to the requests we've sent...
    {NoReq1, NoGcReq1} = receive
			     {check_process_code, {Tag, _P, GC}, _Res} ->
				 case GC of
				     false -> {NoReq0-1, NoGcReq0};
				     true -> {NoReq0, NoGcReq0-1}
				 end
			 end,
    check_proc_code(CpcS, NoReq1, NoGcReq1, [], _KillState, false);
check_proc_code(#cpc_static{tag = Tag} = CpcS, NoReq0, NoGcReq0, NeedGC0,
		KillState0, Success) ->

    %% Check if we should request a GC operation
    {NoGcReq1, NeedGC1} = case NoGcReq0 < ?MAX_CPC_GC_PROCS of
			      GcOpAllowed when GcOpAllowed == false;
					       NeedGC0 == [] ->
				  {NoGcReq0, NeedGC0};
			      _ ->
				  {NoGcReq0+1, cpc_request_gc(CpcS,NeedGC0)}
			  end,

    %% Wait for a cpc reply or 'DOWN' message
    {NoReq1, NoGcReq2, Pid, Result, KillState1} = cpc_recv(Tag,
							   NoReq0,
							   NoGcReq1,
							   KillState0),

    %% Check the result of the reply
    case Result of
	aborted ->
	    %% Operation aborted due to the need to GC in order to
	    %% determine if the process is referring the module.
	    %% Schedule the operation for restart allowing GC...
	    check_proc_code(CpcS, NoReq1, NoGcReq2, [Pid|NeedGC1], KillState1,
			    Success);
	false ->
	    %% Process not referring the module; done with this process...
	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, KillState1,
			    Success);
	true ->
	    %% Process referring the module...
	    case CpcS#cpc_static.hard of
		false ->
		    %% ... and soft check. The whole operation failed so
		    %% no point continuing; clean up and fail...
		    check_proc_code(CpcS, NoReq1, NoGcReq2, [], KillState1,
				    false);
		true ->
		    %% ... and hard check; schedule kill of it...
		    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
				    cpc_sched_kill(Pid, KillState1), Success)
	    end;
	'DOWN' ->
	    %% Handled 'DOWN' message
	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
			    KillState1, Success)
    end.

cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = []} = KillState) ->
    receive
	{check_process_code, {Tag, Pid, GC}, Res} ->
	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
    end;
cpc_recv(Tag, NoReq, NoGcReq,
	 #cpc_kill{outstanding = [R0, R1, R2, R3, R4 | _]} = KillState) ->
    receive
	{'DOWN', R, process, _, _} when R == R0;
					R == R1;
					R == R2;
					R == R3;
					R == R4 ->
	    cpc_handle_down(NoReq, NoGcReq, R, KillState);
	{check_process_code, {Tag, Pid, GC}, Res} ->
	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
    end;
cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = [R|_]} = KillState) ->
    receive
	{'DOWN', R, process, _, _} ->
	    cpc_handle_down(NoReq, NoGcReq, R, KillState);
	{check_process_code, {Tag, Pid, GC}, Res} ->
	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
    end.

cpc_handle_down(NoReq, NoGcReq, R, #cpc_kill{outstanding = Rs,
					     no_outstanding = N} = KillState) ->
    {NoReq, NoGcReq, undefined, 'DOWN',
     cpc_sched_kill_waiting(KillState#cpc_kill{outstanding = cpc_list_rm(R, Rs),
					       no_outstanding = N-1})}.

cpc_list_rm(R, [R|Rs]) ->
    Rs;
cpc_list_rm(R0, [R1|Rs]) ->
    [R1|cpc_list_rm(R0, Rs)].

cpc_handle_cpc(NoReq, NoGcReq, false, Pid, Res, KillState) ->
    {NoReq-1, NoGcReq, Pid, Res, KillState};
cpc_handle_cpc(NoReq, NoGcReq, true, Pid, Res, KillState) ->
    {NoReq, NoGcReq-1, Pid, Res, KillState}.

cpc_sched_kill_waiting(#cpc_kill{waiting = []} = KillState) ->
    KillState;
cpc_sched_kill_waiting(#cpc_kill{outstanding = Rs,
				 no_outstanding = N,
				 waiting = [P|Ps]} = KillState) ->
    R = erlang:monitor(process, P),
    exit(P, kill),
    KillState#cpc_kill{outstanding = [R|Rs],
		       no_outstanding = N+1,
		       waiting = Ps,
		       killed = true}.

cpc_sched_kill(Pid, #cpc_kill{no_outstanding = N, waiting = Pids} = KillState)
  when N >= ?MAX_CPC_NO_OUTSTANDING_KILLS ->
    KillState#cpc_kill{waiting = [Pid|Pids]};
cpc_sched_kill(Pid,
	       #cpc_kill{outstanding = Rs, no_outstanding = N} = KillState) ->
    R = erlang:monitor(process, Pid),
    exit(Pid, kill),
    KillState#cpc_kill{outstanding = [R|Rs],
		       no_outstanding = N+1,
		       killed = true}.

cpc_request(#cpc_static{tag = Tag, module = Mod}, Pid, AllowGc) ->
    erts_internal:check_process_code(Pid, Mod, [{async, {Tag, Pid, AllowGc}},
						{allow_gc, AllowGc}]).

cpc_request_gc(CpcS, [Pid|Pids]) ->
    cpc_request(CpcS, Pid, true),
    Pids.

cpc_init(_CpcS, [], NoReqs) ->
    NoReqs;
cpc_init(CpcS, [Pid|Pids], NoReqs) ->
    cpc_request(CpcS, Pid, false),
    cpc_init(CpcS, Pids, NoReqs+1).

% end of check_proc_code() implementation.

%%
%% FOR TESTING ONLY
%%
%% do_test_purge() is for testing only. The purge is done
%% as usual, but the tester can control when to enter the
%% specific phases.
%%
do_test_purge(Mod, From, Type, Ref) when Type == true; Type == false ->
    Mon = erlang:monitor(process, From),
    Res = case Type of
	      true -> do_test_hard_purge(Mod, From, Ref, Mon);
	      false -> do_test_soft_purge(Mod, From, Ref, Mon)
	  end,
    From ! {test_purge, Res, Ref},
    erlang:demonitor(Mon, [flush]),
    ok;
do_test_purge(_, _, _, _) ->
    ok.

do_test_soft_purge(Mod, From, Ref, Mon) ->
    PrepRes = erts_internal:purge_module(Mod, prepare),
    TestRes = test_progress(started, From, Mon, Ref, ok),
    case PrepRes of
	false ->
	    _ = test_progress(continued, From, Mon, Ref, TestRes),
	    true;
	true ->
	    Res = check_proc_code(erlang:processes(), Mod, false),
	    _ = test_progress(continued, From, Mon, Ref, TestRes),
	    erts_internal:purge_module(Mod,
				       case Res of
					   false -> abort;
					   true -> complete
				       end)
    end.

do_test_hard_purge(Mod, From, Ref, Mon) ->
    PrepRes = erts_internal:purge_module(Mod, prepare),
    TestRes = test_progress(started, From, Mon, Ref, ok),
    case PrepRes of
	false ->
	    _ = test_progress(continued, From, Mon, Ref, TestRes),
	    {false, false};
	true ->
	    DidKill = check_proc_code(erlang:processes(), Mod, true),
	    _ = test_progress(continued, From, Mon, Ref, TestRes),
	    true = erts_internal:purge_module(Mod, complete),
	    {true, DidKill}
    end.

test_progress(_State, _From, _Mon, _Ref, died) ->
    %% Test process died; continue so we wont
    %% leave the system in an inconsistent
    %% state...
    died;
test_progress(started, From, Mon, Ref, ok) ->
    From ! {started, Ref},
    receive
	{'DOWN', Mon, process, From, _} -> died;
	{continue, Ref} -> ok
    end;
test_progress(continued, From, Mon, Ref, ok) ->
    From ! {continued, Ref},
    receive
	{'DOWN', Mon, process, From, _} -> died;
	{complete, Ref} -> ok
    end.