Parallel check_process_code when code_server purge a module

When the code_server is about to purge a module it will now issue asynchronous check_process_code requests to all processes at once instead of one at a time. These check_process_code operation can execute in parallel.
author: Rickard Green <[email protected]> 2013-11-10 23:37:49 +0100
committer: Rickard Green <[email protected]> 2013-11-18 20:12:36 +0100
commit: 9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3 (patch)
tree: 57dcc1b92c1e00f86c45861b2163218009e792fa /lib/kernel/src
parent: c6cb0293ba33e1671f8ed670d5add082e5ee674a (diff)
download: otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.gz
otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.bz2
otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.zip
1 files changed, 218 insertions, 27 deletions
diff --git a/lib/kernel/src/code_server.erl b/lib/kernel/src/code_server.erl
index 5d74e8620b..39b50fc219 100644
--- a/lib/kernel/src/code_server.erl
+++ b/lib/kernel/src/code_server.erl
@@ -1410,45 +1410,236 @@ absname_vr([[X, $:]|Name], _, _AbsBase) ->
 do_purge(Mod0) ->
     Mod = to_atom(Mod0),
     case erlang:check_old_code(Mod) of
-	false -> false;
-	true -> do_purge(processes(), Mod, false)
-    end.
-
-do_purge([P|Ps], Mod, Purged) ->
-    case erlang:check_process_code(P, Mod) of
+	false ->
+	    false;
 	true ->
-	    Ref = erlang:monitor(process, P),
-	    exit(P, kill),
-	    receive
-		{'DOWN',Ref,process,_Pid,_} -> ok
+	    Res = check_proc_code(erlang:processes(), Mod, true),
+	    try
+		erlang:purge_module(Mod)
+	    catch
+		_:_ -> ignore
 	    end,
-	    do_purge(Ps, Mod, true);
-	false ->
-	    do_purge(Ps, Mod, Purged)
-    end;
-do_purge([], Mod, Purged) ->
-    catch erlang:purge_module(Mod),
-    Purged.
+	    Res
+    end.
 
 %% do_soft_purge(Module)
 %% Purge old code only if no procs remain that run old code.
 %% Return true in that case, false if procs remain (in this
 %% case old code is not purged)
 
-do_soft_purge(Mod) ->
+do_soft_purge(Mod0) ->
+    Mod = to_atom(Mod0),
     case erlang:check_old_code(Mod) of
-	false -> true;
-	true -> do_soft_purge(processes(), Mod)
+	false ->
+	    true;
+	true ->
+	    case check_proc_code(erlang:processes(), Mod, false) of
+		false ->
+		    false;
+		true ->
+		    try
+			erlang:purge_module(Mod)
+		    catch
+			_:_ -> ignore
+		    end,
+		    true
+	    end
     end.
 
-do_soft_purge([P|Ps], Mod) ->
-    case erlang:check_process_code(P, Mod) of
-	true -> false;
-	false -> do_soft_purge(Ps, Mod)
+%%
+%% check_proc_code(Pids, Mod, Hard) - Send asynchronous
+%%   requests to all processes to perform a check_process_code
+%%   operation. Each process will check their own state and
+%%   reply with the result. If 'Hard' equals
+%%   - true, processes that refer 'Mod' will be killed. If
+%%     any processes were killed true is returned; otherwise,
+%%     false.
+%%   - false, and any processes refer 'Mod', false will
+%%     returned; otherwise, true.
+%%
+%%   Requests will be sent to all processes identified by
+%%   Pids at once, but without allowing GC to be performed.
+%%   Check process code operations that are aborted due to
+%%   GC need, will be restarted allowing GC. However, only
+%%   ?MAX_CPC_GC_PROCS outstanding operation allowing GC at
+%%   a time will be allowed. This in order not to blow up
+%%   memory wise.
+%%
+%%   We also only allow ?MAX_CPC_NO_OUTSTANDING_KILLS
+%%   outstanding kills. This both in order to avoid flooding
+%%   our message queue with 'DOWN' messages and limiting the
+%%   amount of memory used to keep references to all
+%%   outstanding kills.
+%%
+
+%% We maybe should allow more than two outstanding
+%% GC requests, but for now we play it safe...
+-define(MAX_CPC_GC_PROCS, 2).
+-define(MAX_CPC_NO_OUTSTANDING_KILLS, 10).
+
+-record(cpc_static, {hard, module, tag}).
+
+-record(cpc_kill, {outstanding = [],
+		   no_outstanding = 0,
+		   waiting = [],
+		   killed = false}).
+
+check_proc_code(Pids, Mod, Hard) ->
+    Tag = erlang:make_ref(),
+    CpcS = #cpc_static{hard = Hard,
+		       module = Mod,
+		       tag = Tag},
+    check_proc_code(CpcS, cpc_init(CpcS, Pids, 0), 0, [], #cpc_kill{}, true).
+
+check_proc_code(#cpc_static{hard = true}, 0, 0, [],
+		#cpc_kill{outstanding = [], waiting = [], killed = Killed},
+		true) ->
+    %% No outstanding requests. We did a hard check, so result is whether or
+    %% not we killed any processes...
+    Killed;
+check_proc_code(#cpc_static{hard = false}, 0, 0, [], _KillState, Success) ->
+    %% No outstanding requests and we did a soft check...
+    Success;
+check_proc_code(#cpc_static{hard = false, tag = Tag} = CpcS, NoReq0, NoGcReq0,
+		[], _KillState, false) ->
+    %% Failed soft check; just cleanup the remaining replies corresponding
+    %% to the requests we've sent...
+    {NoReq1, NoGcReq1} = receive
+			     {check_process_code, {Tag, _P, GC}, _Res} ->
+				 case GC of
+				     false -> {NoReq0-1, NoGcReq0};
+				     true -> {NoReq0, NoGcReq0-1}
+				 end
+			 end,
+    check_proc_code(CpcS, NoReq1, NoGcReq1, [], _KillState, false);
+check_proc_code(#cpc_static{tag = Tag} = CpcS, NoReq0, NoGcReq0, NeedGC0,
+		KillState0, Success) ->
+
+    %% Check if we should request a GC operation
+    {NoGcReq1, NeedGC1} = case NoGcReq0 < ?MAX_CPC_GC_PROCS of
+			      GcOpAllowed when GcOpAllowed == false;
+					       NeedGC0 == [] ->
+				  {NoGcReq0, NeedGC0};
+			      _ ->
+				  {NoGcReq0+1, cpc_request_gc(CpcS,NeedGC0)}
+			  end,
+
+    %% Wait for a cpc reply or 'DOWN' message
+    {NoReq1, NoGcReq2, Pid, Result, KillState1} = cpc_recv(Tag,
+							   NoReq0,
+							   NoGcReq1,
+							   KillState0),
+
+    %% Check the result of the reply
+    case Result of
+	aborted ->
+	    %% Operation aborted due to the need to GC in order to
+	    %% determine if the process is referring the module.
+	    %% Schedule the operation for restart allowing GC...
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, [Pid|NeedGC1], KillState1,
+			    Success);
+	false ->
+	    %% Process not referring the module; done with this process...
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, KillState1,
+			    Success);
+	true ->
+	    %% Process referring the module...
+	    case CpcS#cpc_static.hard of
+		false ->
+		    %% ... and soft check. The whole operation failed so
+		    %% no point continuing; clean up and fail...
+		    check_proc_code(CpcS, NoReq1, NoGcReq2, [], KillState1,
+				    false);
+		true ->
+		    %% ... and hard check; schedule kill of it...
+		    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
+				    cpc_sched_kill(Pid, KillState1), Success)
+	    end;
+	'DOWN' ->
+	    %% Handled 'DOWN' message
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
+			    KillState1, Success)
+    end.
+
+cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = []} = KillState) ->
+    receive
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
     end;
-do_soft_purge([], Mod) ->
-    catch erlang:purge_module(Mod),
-    true.
+cpc_recv(Tag, NoReq, NoGcReq,
+	 #cpc_kill{outstanding = [R0, R1, R2, R3, R4 | _]} = KillState) ->
+    receive
+	{'DOWN', R, process, _, _} when R == R0;
+					R == R1;
+					R == R2;
+					R == R3;
+					R == R4 ->
+	    cpc_handle_down(NoReq, NoGcReq, R, KillState); 
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
+    end;
+cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = [R|_]} = KillState) ->
+    receive
+	{'DOWN', R, process, _, _} ->
+	    cpc_handle_down(NoReq, NoGcReq, R, KillState); 
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
+    end.
+
+cpc_handle_down(NoReq, NoGcReq, R, #cpc_kill{outstanding = Rs,
+					     no_outstanding = N} = KillState) ->
+    {NoReq, NoGcReq, undefined, 'DOWN',
+     cpc_sched_kill_waiting(KillState#cpc_kill{outstanding = cpc_list_rm(R, Rs),
+					       no_outstanding = N-1})}.
+
+cpc_list_rm(R, [R|Rs]) ->
+    Rs;
+cpc_list_rm(R0, [R1|Rs]) ->
+    [R1|cpc_list_rm(R0, Rs)].
+
+cpc_handle_cpc(NoReq, NoGcReq, false, Pid, Res, KillState) ->
+    {NoReq-1, NoGcReq, Pid, Res, KillState};
+cpc_handle_cpc(NoReq, NoGcReq, true, Pid, Res, KillState) ->
+    {NoReq, NoGcReq-1, Pid, Res, KillState}.
+
+cpc_sched_kill_waiting(#cpc_kill{waiting = []} = KillState) ->
+    KillState;
+cpc_sched_kill_waiting(#cpc_kill{outstanding = Rs,
+				 no_outstanding = N,
+				 waiting = [P|Ps]} = KillState) ->
+    R = erlang:monitor(process, P),
+    exit(P, kill),
+    KillState#cpc_kill{outstanding = [R|Rs],
+		       no_outstanding = N+1,
+		       waiting = Ps,
+		       killed = true}.
+
+cpc_sched_kill(Pid, #cpc_kill{no_outstanding = N, waiting = Pids} = KillState)
+  when N >= ?MAX_CPC_NO_OUTSTANDING_KILLS ->
+    KillState#cpc_kill{waiting = [Pid|Pids]};
+cpc_sched_kill(Pid,
+	       #cpc_kill{outstanding = Rs, no_outstanding = N} = KillState) ->
+    R = erlang:monitor(process, Pid),
+    exit(Pid, kill),
+    KillState#cpc_kill{outstanding = [R|Rs],
+		       no_outstanding = N+1,
+		       killed = true}.
+
+cpc_request(#cpc_static{tag = Tag, module = Mod}, Pid, AllowGc) ->
+    erlang:check_process_code(Pid, Mod, [{async, {Tag, Pid, AllowGc}},
+					 {allow_gc, AllowGc}]).
+
+cpc_request_gc(CpcS, [Pid|Pids]) ->
+    cpc_request(CpcS, Pid, true),
+    Pids.
+
+cpc_init(_CpcS, [], NoReqs) ->
+    NoReqs;
+cpc_init(CpcS, [Pid|Pids], NoReqs) ->
+    cpc_request(CpcS, Pid, false),
+    cpc_init(CpcS, Pids, NoReqs+1).
+
+% end of check_proc_code() implementation.
 
 is_loaded(M, Db) ->
     case ets:lookup(Db, M) of
author	Rickard Green <[email protected]>	2013-11-10 23:37:49 +0100
committer	Rickard Green <[email protected]>	2013-11-18 20:12:36 +0100
commit	9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3 (patch)
tree	57dcc1b92c1e00f86c45861b2163218009e792fa /lib/kernel/src
parent	c6cb0293ba33e1671f8ed670d5add082e5ee674a (diff)
download	otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.gz otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.bz2 otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.zip