From ca0425c6ff85262bc15367f5fd9cbc51cde52b20 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Wed, 2 Oct 2013 10:07:27 +0200
Subject: Execution of system tasks in context of another process

A process requesting a system task to be executed in the context of
another process will be notified by a message when the task has
executed. This message will be on the form:
  {RequestType, RequestId, Pid, Result}.

A process requesting a system task to be executed can set priority
on the system task. The requester typically set the same priority
on the task as its own process priority, and by this avoiding
priority inversion. A request for execution of a system task is
made by calling the statically linked in NIF
erts_internal:request_system_task(Pid, Prio, Request). This is an
undocumented ERTS internal function that should remain so. It
should *only* be called from BIF implementations.

Currently defined system tasks are:
* garbage_collect
* check_process_code

Further system tasks can and will be implemented in the future.

The erlang:garbage_collect/[1,2] and erlang:check_process_code/[2,3]
BIFs are now implemented using system tasks. Both the
'garbage_collect' and the 'check_process_code' operations perform
or may perform garbage_collections. By doing these via the
system task functionality all garbage collect operations in the
system will be performed solely in the context of the process
being garbage collected. This makes it possible to later implement
functionality for disabling garbage collection of a process over
context switches.

Newly introduced BIFs:

* erlang:garbage_collect/2 - The new second argument is an option
  list. Introduced option:
  * {async, RequestId} - making it possible for users to issue
    asynchronous garbage collect requests.

* erlang:check_process_code/3 -  The new third argument is an
  option list. Introduced options:
  * {async, RequestId} - making it possible for users to issue
    asynchronous check process code requests.
  * {allow_gc, boolean()} - making it possible to issue requests
    that aren't allowed to garbage collect (operation will abort
    if gc should be needed).
  These options have been introduced as a preparation for
  parallelization of check_process_code operations when the
  code_server is about to purge a module.
---
 lib/stdlib/src/erl_internal.erl | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/stdlib/src/erl_internal.erl b/lib/stdlib/src/erl_internal.erl
index 378e629ac9..28de7205ea 100644
--- a/lib/stdlib/src/erl_internal.erl
+++ b/lib/stdlib/src/erl_internal.erl
@@ -267,6 +267,7 @@ bif(bitstring_to_list, 1) -> true;
 bif(byte_size, 1) -> true;
 bif(check_old_code, 1) -> true;
 bif(check_process_code, 2) -> true;
+bif(check_process_code, 3) -> true;
 bif(date, 0) -> true;
 bif(delete_module, 1) -> true;
 bif(demonitor, 1) -> true;
@@ -286,6 +287,7 @@ bif(float_to_binary, 1) -> true;
 bif(float_to_binary, 2) -> true;
 bif(garbage_collect, 0) -> true;
 bif(garbage_collect, 1) -> true;
+bif(garbage_collect, 2) -> true;
 bif(get, 0) -> true;
 bif(get, 1) -> true;
 bif(get_keys, 1) -> true;
-- 
cgit v1.2.3


From 9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Sun, 10 Nov 2013 23:37:49 +0100
Subject: Parallel check_process_code when code_server purge a module

When the code_server is about to purge a module it will now issue
asynchronous check_process_code requests to all processes at once
instead of one at a time. These check_process_code operation can
execute in parallel.
---
 lib/kernel/src/code_server.erl | 245 ++++++++++++++++++++++++++++++++++++-----
 lib/kernel/test/code_SUITE.erl |  41 ++++++-
 2 files changed, 257 insertions(+), 29 deletions(-)

(limited to 'lib')

diff --git a/lib/kernel/src/code_server.erl b/lib/kernel/src/code_server.erl
index 5d74e8620b..39b50fc219 100644
--- a/lib/kernel/src/code_server.erl
+++ b/lib/kernel/src/code_server.erl
@@ -1410,45 +1410,236 @@ absname_vr([[X, $:]|Name], _, _AbsBase) ->
 do_purge(Mod0) ->
     Mod = to_atom(Mod0),
     case erlang:check_old_code(Mod) of
-	false -> false;
-	true -> do_purge(processes(), Mod, false)
-    end.
-
-do_purge([P|Ps], Mod, Purged) ->
-    case erlang:check_process_code(P, Mod) of
+	false ->
+	    false;
 	true ->
-	    Ref = erlang:monitor(process, P),
-	    exit(P, kill),
-	    receive
-		{'DOWN',Ref,process,_Pid,_} -> ok
+	    Res = check_proc_code(erlang:processes(), Mod, true),
+	    try
+		erlang:purge_module(Mod)
+	    catch
+		_:_ -> ignore
 	    end,
-	    do_purge(Ps, Mod, true);
-	false ->
-	    do_purge(Ps, Mod, Purged)
-    end;
-do_purge([], Mod, Purged) ->
-    catch erlang:purge_module(Mod),
-    Purged.
+	    Res
+    end.
 
 %% do_soft_purge(Module)
 %% Purge old code only if no procs remain that run old code.
 %% Return true in that case, false if procs remain (in this
 %% case old code is not purged)
 
-do_soft_purge(Mod) ->
+do_soft_purge(Mod0) ->
+    Mod = to_atom(Mod0),
     case erlang:check_old_code(Mod) of
-	false -> true;
-	true -> do_soft_purge(processes(), Mod)
+	false ->
+	    true;
+	true ->
+	    case check_proc_code(erlang:processes(), Mod, false) of
+		false ->
+		    false;
+		true ->
+		    try
+			erlang:purge_module(Mod)
+		    catch
+			_:_ -> ignore
+		    end,
+		    true
+	    end
     end.
 
-do_soft_purge([P|Ps], Mod) ->
-    case erlang:check_process_code(P, Mod) of
-	true -> false;
-	false -> do_soft_purge(Ps, Mod)
+%%
+%% check_proc_code(Pids, Mod, Hard) - Send asynchronous
+%%   requests to all processes to perform a check_process_code
+%%   operation. Each process will check their own state and
+%%   reply with the result. If 'Hard' equals
+%%   - true, processes that refer 'Mod' will be killed. If
+%%     any processes were killed true is returned; otherwise,
+%%     false.
+%%   - false, and any processes refer 'Mod', false will
+%%     returned; otherwise, true.
+%%
+%%   Requests will be sent to all processes identified by
+%%   Pids at once, but without allowing GC to be performed.
+%%   Check process code operations that are aborted due to
+%%   GC need, will be restarted allowing GC. However, only
+%%   ?MAX_CPC_GC_PROCS outstanding operation allowing GC at
+%%   a time will be allowed. This in order not to blow up
+%%   memory wise.
+%%
+%%   We also only allow ?MAX_CPC_NO_OUTSTANDING_KILLS
+%%   outstanding kills. This both in order to avoid flooding
+%%   our message queue with 'DOWN' messages and limiting the
+%%   amount of memory used to keep references to all
+%%   outstanding kills.
+%%
+
+%% We maybe should allow more than two outstanding
+%% GC requests, but for now we play it safe...
+-define(MAX_CPC_GC_PROCS, 2).
+-define(MAX_CPC_NO_OUTSTANDING_KILLS, 10).
+
+-record(cpc_static, {hard, module, tag}).
+
+-record(cpc_kill, {outstanding = [],
+		   no_outstanding = 0,
+		   waiting = [],
+		   killed = false}).
+
+check_proc_code(Pids, Mod, Hard) ->
+    Tag = erlang:make_ref(),
+    CpcS = #cpc_static{hard = Hard,
+		       module = Mod,
+		       tag = Tag},
+    check_proc_code(CpcS, cpc_init(CpcS, Pids, 0), 0, [], #cpc_kill{}, true).
+
+check_proc_code(#cpc_static{hard = true}, 0, 0, [],
+		#cpc_kill{outstanding = [], waiting = [], killed = Killed},
+		true) ->
+    %% No outstanding requests. We did a hard check, so result is whether or
+    %% not we killed any processes...
+    Killed;
+check_proc_code(#cpc_static{hard = false}, 0, 0, [], _KillState, Success) ->
+    %% No outstanding requests and we did a soft check...
+    Success;
+check_proc_code(#cpc_static{hard = false, tag = Tag} = CpcS, NoReq0, NoGcReq0,
+		[], _KillState, false) ->
+    %% Failed soft check; just cleanup the remaining replies corresponding
+    %% to the requests we've sent...
+    {NoReq1, NoGcReq1} = receive
+			     {check_process_code, {Tag, _P, GC}, _Res} ->
+				 case GC of
+				     false -> {NoReq0-1, NoGcReq0};
+				     true -> {NoReq0, NoGcReq0-1}
+				 end
+			 end,
+    check_proc_code(CpcS, NoReq1, NoGcReq1, [], _KillState, false);
+check_proc_code(#cpc_static{tag = Tag} = CpcS, NoReq0, NoGcReq0, NeedGC0,
+		KillState0, Success) ->
+
+    %% Check if we should request a GC operation
+    {NoGcReq1, NeedGC1} = case NoGcReq0 < ?MAX_CPC_GC_PROCS of
+			      GcOpAllowed when GcOpAllowed == false;
+					       NeedGC0 == [] ->
+				  {NoGcReq0, NeedGC0};
+			      _ ->
+				  {NoGcReq0+1, cpc_request_gc(CpcS,NeedGC0)}
+			  end,
+
+    %% Wait for a cpc reply or 'DOWN' message
+    {NoReq1, NoGcReq2, Pid, Result, KillState1} = cpc_recv(Tag,
+							   NoReq0,
+							   NoGcReq1,
+							   KillState0),
+
+    %% Check the result of the reply
+    case Result of
+	aborted ->
+	    %% Operation aborted due to the need to GC in order to
+	    %% determine if the process is referring the module.
+	    %% Schedule the operation for restart allowing GC...
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, [Pid|NeedGC1], KillState1,
+			    Success);
+	false ->
+	    %% Process not referring the module; done with this process...
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, KillState1,
+			    Success);
+	true ->
+	    %% Process referring the module...
+	    case CpcS#cpc_static.hard of
+		false ->
+		    %% ... and soft check. The whole operation failed so
+		    %% no point continuing; clean up and fail...
+		    check_proc_code(CpcS, NoReq1, NoGcReq2, [], KillState1,
+				    false);
+		true ->
+		    %% ... and hard check; schedule kill of it...
+		    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
+				    cpc_sched_kill(Pid, KillState1), Success)
+	    end;
+	'DOWN' ->
+	    %% Handled 'DOWN' message
+	    check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1,
+			    KillState1, Success)
+    end.
+
+cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = []} = KillState) ->
+    receive
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
     end;
-do_soft_purge([], Mod) ->
-    catch erlang:purge_module(Mod),
-    true.
+cpc_recv(Tag, NoReq, NoGcReq,
+	 #cpc_kill{outstanding = [R0, R1, R2, R3, R4 | _]} = KillState) ->
+    receive
+	{'DOWN', R, process, _, _} when R == R0;
+					R == R1;
+					R == R2;
+					R == R3;
+					R == R4 ->
+	    cpc_handle_down(NoReq, NoGcReq, R, KillState); 
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
+    end;
+cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = [R|_]} = KillState) ->
+    receive
+	{'DOWN', R, process, _, _} ->
+	    cpc_handle_down(NoReq, NoGcReq, R, KillState); 
+	{check_process_code, {Tag, Pid, GC}, Res} ->
+	    cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState)
+    end.
+
+cpc_handle_down(NoReq, NoGcReq, R, #cpc_kill{outstanding = Rs,
+					     no_outstanding = N} = KillState) ->
+    {NoReq, NoGcReq, undefined, 'DOWN',
+     cpc_sched_kill_waiting(KillState#cpc_kill{outstanding = cpc_list_rm(R, Rs),
+					       no_outstanding = N-1})}.
+
+cpc_list_rm(R, [R|Rs]) ->
+    Rs;
+cpc_list_rm(R0, [R1|Rs]) ->
+    [R1|cpc_list_rm(R0, Rs)].
+
+cpc_handle_cpc(NoReq, NoGcReq, false, Pid, Res, KillState) ->
+    {NoReq-1, NoGcReq, Pid, Res, KillState};
+cpc_handle_cpc(NoReq, NoGcReq, true, Pid, Res, KillState) ->
+    {NoReq, NoGcReq-1, Pid, Res, KillState}.
+
+cpc_sched_kill_waiting(#cpc_kill{waiting = []} = KillState) ->
+    KillState;
+cpc_sched_kill_waiting(#cpc_kill{outstanding = Rs,
+				 no_outstanding = N,
+				 waiting = [P|Ps]} = KillState) ->
+    R = erlang:monitor(process, P),
+    exit(P, kill),
+    KillState#cpc_kill{outstanding = [R|Rs],
+		       no_outstanding = N+1,
+		       waiting = Ps,
+		       killed = true}.
+
+cpc_sched_kill(Pid, #cpc_kill{no_outstanding = N, waiting = Pids} = KillState)
+  when N >= ?MAX_CPC_NO_OUTSTANDING_KILLS ->
+    KillState#cpc_kill{waiting = [Pid|Pids]};
+cpc_sched_kill(Pid,
+	       #cpc_kill{outstanding = Rs, no_outstanding = N} = KillState) ->
+    R = erlang:monitor(process, Pid),
+    exit(Pid, kill),
+    KillState#cpc_kill{outstanding = [R|Rs],
+		       no_outstanding = N+1,
+		       killed = true}.
+
+cpc_request(#cpc_static{tag = Tag, module = Mod}, Pid, AllowGc) ->
+    erlang:check_process_code(Pid, Mod, [{async, {Tag, Pid, AllowGc}},
+					 {allow_gc, AllowGc}]).
+
+cpc_request_gc(CpcS, [Pid|Pids]) ->
+    cpc_request(CpcS, Pid, true),
+    Pids.
+
+cpc_init(_CpcS, [], NoReqs) ->
+    NoReqs;
+cpc_init(CpcS, [Pid|Pids], NoReqs) ->
+    cpc_request(CpcS, Pid, false),
+    cpc_init(CpcS, Pids, NoReqs+1).
+
+% end of check_proc_code() implementation.
 
 is_loaded(M, Db) ->
     case ets:lookup(Db, M) of
diff --git a/lib/kernel/test/code_SUITE.erl b/lib/kernel/test/code_SUITE.erl
index cd9359f2aa..17983e972d 100644
--- a/lib/kernel/test/code_SUITE.erl
+++ b/lib/kernel/test/code_SUITE.erl
@@ -23,7 +23,8 @@
 -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2]).
 -export([set_path/1, get_path/1, add_path/1, add_paths/1, del_path/1,
 	 replace_path/1, load_file/1, load_abs/1, ensure_loaded/1,
-	 delete/1, purge/1, soft_purge/1, is_loaded/1, all_loaded/1,
+	 delete/1, purge/1, purge_many_exits/1, soft_purge/1, is_loaded/1,
+	 all_loaded/1,
 	 load_binary/1, dir_req/1, object_code/1, set_path_file/1,
 	 upgrade/1,
 	 sticky_dir/1, pa_pz_option/1, add_del_path/1,
@@ -51,7 +52,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 all() ->
     [set_path, get_path, add_path, add_paths, del_path,
      replace_path, load_file, load_abs, ensure_loaded,
-     delete, purge, soft_purge, is_loaded, all_loaded,
+     delete, purge, purge_many_exits, soft_purge, is_loaded, all_loaded,
      load_binary, dir_req, object_code, set_path_file,
      upgrade,
      pa_pz_option, add_del_path, dir_disappeared,
@@ -369,6 +370,42 @@ purge(Config) when is_list(Config) ->
     process_flag(trap_exit, OldFlag),
     ok.
 
+purge_many_exits(Config) when is_list(Config) ->
+    OldFlag = process_flag(trap_exit, true),
+    code:purge(code_b_test),
+    {'EXIT',_} = (catch code:purge({})),
+    false = code:purge(code_b_test),
+    TPids = lists:map(fun (_) ->
+			      {code_b_test:do_spawn(),
+			       spawn_link(fun () ->
+						  receive
+						  after infinity -> ok
+						  end
+					  end)}
+			 end,
+			 lists:seq(1, 1000)),
+    % Give them time to start...
+    receive after 1000 -> ok end,
+    true = code:delete(code_b_test),
+    lists:foreach(fun ({Pid1, Pid2}) ->
+			  true = erlang:is_process_alive(Pid1),
+			  false = code_b_test:check_exit(Pid1),
+			  true = erlang:is_process_alive(Pid2)
+		  end, TPids),
+    true = code:purge(code_b_test),
+    lists:foreach(fun ({Pid1, Pid2}) ->
+			  false = erlang:is_process_alive(Pid1),
+			  true = code_b_test:check_exit(Pid1),
+			  true = erlang:is_process_alive(Pid2),
+			  exit(Pid2, kill)
+		  end, TPids),
+    lists:foreach(fun ({_Pid1, Pid2}) ->
+			  receive {'EXIT', Pid2, _} -> ok end
+		  end, TPids),
+    process_flag(trap_exit, OldFlag),
+    ok.
+
+
 soft_purge(suite) -> [];
 soft_purge(doc) -> [];
 soft_purge(Config) when is_list(Config) ->
-- 
cgit v1.2.3