diff options
author | Rickard Green <[email protected]> | 2013-11-10 23:37:49 +0100 |
---|---|---|
committer | Rickard Green <[email protected]> | 2013-11-18 20:12:36 +0100 |
commit | 9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3 (patch) | |
tree | 57dcc1b92c1e00f86c45861b2163218009e792fa | |
parent | c6cb0293ba33e1671f8ed670d5add082e5ee674a (diff) | |
download | otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.gz otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.tar.bz2 otp-9f1f0bff7f98d62f8406e5ecd76f6eb7c1a66ff3.zip |
Parallel check_process_code when code_server purge a module
When the code_server is about to purge a module it will now issue
asynchronous check_process_code requests to all processes at once
instead of one at a time. These check_process_code operation can
execute in parallel.
-rw-r--r-- | bootstrap/lib/kernel/ebin/code_server.beam | bin | 26384 -> 28536 bytes | |||
-rw-r--r-- | lib/kernel/src/code_server.erl | 245 | ||||
-rw-r--r-- | lib/kernel/test/code_SUITE.erl | 41 |
3 files changed, 257 insertions, 29 deletions
diff --git a/bootstrap/lib/kernel/ebin/code_server.beam b/bootstrap/lib/kernel/ebin/code_server.beam Binary files differindex 89417b095b..896e3cb61b 100644 --- a/bootstrap/lib/kernel/ebin/code_server.beam +++ b/bootstrap/lib/kernel/ebin/code_server.beam diff --git a/lib/kernel/src/code_server.erl b/lib/kernel/src/code_server.erl index 5d74e8620b..39b50fc219 100644 --- a/lib/kernel/src/code_server.erl +++ b/lib/kernel/src/code_server.erl @@ -1410,45 +1410,236 @@ absname_vr([[X, $:]|Name], _, _AbsBase) -> do_purge(Mod0) -> Mod = to_atom(Mod0), case erlang:check_old_code(Mod) of - false -> false; - true -> do_purge(processes(), Mod, false) - end. - -do_purge([P|Ps], Mod, Purged) -> - case erlang:check_process_code(P, Mod) of + false -> + false; true -> - Ref = erlang:monitor(process, P), - exit(P, kill), - receive - {'DOWN',Ref,process,_Pid,_} -> ok + Res = check_proc_code(erlang:processes(), Mod, true), + try + erlang:purge_module(Mod) + catch + _:_ -> ignore end, - do_purge(Ps, Mod, true); - false -> - do_purge(Ps, Mod, Purged) - end; -do_purge([], Mod, Purged) -> - catch erlang:purge_module(Mod), - Purged. + Res + end. %% do_soft_purge(Module) %% Purge old code only if no procs remain that run old code. %% Return true in that case, false if procs remain (in this %% case old code is not purged) -do_soft_purge(Mod) -> +do_soft_purge(Mod0) -> + Mod = to_atom(Mod0), case erlang:check_old_code(Mod) of - false -> true; - true -> do_soft_purge(processes(), Mod) + false -> + true; + true -> + case check_proc_code(erlang:processes(), Mod, false) of + false -> + false; + true -> + try + erlang:purge_module(Mod) + catch + _:_ -> ignore + end, + true + end end. -do_soft_purge([P|Ps], Mod) -> - case erlang:check_process_code(P, Mod) of - true -> false; - false -> do_soft_purge(Ps, Mod) +%% +%% check_proc_code(Pids, Mod, Hard) - Send asynchronous +%% requests to all processes to perform a check_process_code +%% operation. Each process will check their own state and +%% reply with the result. If 'Hard' equals +%% - true, processes that refer 'Mod' will be killed. If +%% any processes were killed true is returned; otherwise, +%% false. +%% - false, and any processes refer 'Mod', false will +%% returned; otherwise, true. +%% +%% Requests will be sent to all processes identified by +%% Pids at once, but without allowing GC to be performed. +%% Check process code operations that are aborted due to +%% GC need, will be restarted allowing GC. However, only +%% ?MAX_CPC_GC_PROCS outstanding operation allowing GC at +%% a time will be allowed. This in order not to blow up +%% memory wise. +%% +%% We also only allow ?MAX_CPC_NO_OUTSTANDING_KILLS +%% outstanding kills. This both in order to avoid flooding +%% our message queue with 'DOWN' messages and limiting the +%% amount of memory used to keep references to all +%% outstanding kills. +%% + +%% We maybe should allow more than two outstanding +%% GC requests, but for now we play it safe... +-define(MAX_CPC_GC_PROCS, 2). +-define(MAX_CPC_NO_OUTSTANDING_KILLS, 10). + +-record(cpc_static, {hard, module, tag}). + +-record(cpc_kill, {outstanding = [], + no_outstanding = 0, + waiting = [], + killed = false}). + +check_proc_code(Pids, Mod, Hard) -> + Tag = erlang:make_ref(), + CpcS = #cpc_static{hard = Hard, + module = Mod, + tag = Tag}, + check_proc_code(CpcS, cpc_init(CpcS, Pids, 0), 0, [], #cpc_kill{}, true). + +check_proc_code(#cpc_static{hard = true}, 0, 0, [], + #cpc_kill{outstanding = [], waiting = [], killed = Killed}, + true) -> + %% No outstanding requests. We did a hard check, so result is whether or + %% not we killed any processes... + Killed; +check_proc_code(#cpc_static{hard = false}, 0, 0, [], _KillState, Success) -> + %% No outstanding requests and we did a soft check... + Success; +check_proc_code(#cpc_static{hard = false, tag = Tag} = CpcS, NoReq0, NoGcReq0, + [], _KillState, false) -> + %% Failed soft check; just cleanup the remaining replies corresponding + %% to the requests we've sent... + {NoReq1, NoGcReq1} = receive + {check_process_code, {Tag, _P, GC}, _Res} -> + case GC of + false -> {NoReq0-1, NoGcReq0}; + true -> {NoReq0, NoGcReq0-1} + end + end, + check_proc_code(CpcS, NoReq1, NoGcReq1, [], _KillState, false); +check_proc_code(#cpc_static{tag = Tag} = CpcS, NoReq0, NoGcReq0, NeedGC0, + KillState0, Success) -> + + %% Check if we should request a GC operation + {NoGcReq1, NeedGC1} = case NoGcReq0 < ?MAX_CPC_GC_PROCS of + GcOpAllowed when GcOpAllowed == false; + NeedGC0 == [] -> + {NoGcReq0, NeedGC0}; + _ -> + {NoGcReq0+1, cpc_request_gc(CpcS,NeedGC0)} + end, + + %% Wait for a cpc reply or 'DOWN' message + {NoReq1, NoGcReq2, Pid, Result, KillState1} = cpc_recv(Tag, + NoReq0, + NoGcReq1, + KillState0), + + %% Check the result of the reply + case Result of + aborted -> + %% Operation aborted due to the need to GC in order to + %% determine if the process is referring the module. + %% Schedule the operation for restart allowing GC... + check_proc_code(CpcS, NoReq1, NoGcReq2, [Pid|NeedGC1], KillState1, + Success); + false -> + %% Process not referring the module; done with this process... + check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, KillState1, + Success); + true -> + %% Process referring the module... + case CpcS#cpc_static.hard of + false -> + %% ... and soft check. The whole operation failed so + %% no point continuing; clean up and fail... + check_proc_code(CpcS, NoReq1, NoGcReq2, [], KillState1, + false); + true -> + %% ... and hard check; schedule kill of it... + check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, + cpc_sched_kill(Pid, KillState1), Success) + end; + 'DOWN' -> + %% Handled 'DOWN' message + check_proc_code(CpcS, NoReq1, NoGcReq2, NeedGC1, + KillState1, Success) + end. + +cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = []} = KillState) -> + receive + {check_process_code, {Tag, Pid, GC}, Res} -> + cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState) end; -do_soft_purge([], Mod) -> - catch erlang:purge_module(Mod), - true. +cpc_recv(Tag, NoReq, NoGcReq, + #cpc_kill{outstanding = [R0, R1, R2, R3, R4 | _]} = KillState) -> + receive + {'DOWN', R, process, _, _} when R == R0; + R == R1; + R == R2; + R == R3; + R == R4 -> + cpc_handle_down(NoReq, NoGcReq, R, KillState); + {check_process_code, {Tag, Pid, GC}, Res} -> + cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState) + end; +cpc_recv(Tag, NoReq, NoGcReq, #cpc_kill{outstanding = [R|_]} = KillState) -> + receive + {'DOWN', R, process, _, _} -> + cpc_handle_down(NoReq, NoGcReq, R, KillState); + {check_process_code, {Tag, Pid, GC}, Res} -> + cpc_handle_cpc(NoReq, NoGcReq, GC, Pid, Res, KillState) + end. + +cpc_handle_down(NoReq, NoGcReq, R, #cpc_kill{outstanding = Rs, + no_outstanding = N} = KillState) -> + {NoReq, NoGcReq, undefined, 'DOWN', + cpc_sched_kill_waiting(KillState#cpc_kill{outstanding = cpc_list_rm(R, Rs), + no_outstanding = N-1})}. + +cpc_list_rm(R, [R|Rs]) -> + Rs; +cpc_list_rm(R0, [R1|Rs]) -> + [R1|cpc_list_rm(R0, Rs)]. + +cpc_handle_cpc(NoReq, NoGcReq, false, Pid, Res, KillState) -> + {NoReq-1, NoGcReq, Pid, Res, KillState}; +cpc_handle_cpc(NoReq, NoGcReq, true, Pid, Res, KillState) -> + {NoReq, NoGcReq-1, Pid, Res, KillState}. + +cpc_sched_kill_waiting(#cpc_kill{waiting = []} = KillState) -> + KillState; +cpc_sched_kill_waiting(#cpc_kill{outstanding = Rs, + no_outstanding = N, + waiting = [P|Ps]} = KillState) -> + R = erlang:monitor(process, P), + exit(P, kill), + KillState#cpc_kill{outstanding = [R|Rs], + no_outstanding = N+1, + waiting = Ps, + killed = true}. + +cpc_sched_kill(Pid, #cpc_kill{no_outstanding = N, waiting = Pids} = KillState) + when N >= ?MAX_CPC_NO_OUTSTANDING_KILLS -> + KillState#cpc_kill{waiting = [Pid|Pids]}; +cpc_sched_kill(Pid, + #cpc_kill{outstanding = Rs, no_outstanding = N} = KillState) -> + R = erlang:monitor(process, Pid), + exit(Pid, kill), + KillState#cpc_kill{outstanding = [R|Rs], + no_outstanding = N+1, + killed = true}. + +cpc_request(#cpc_static{tag = Tag, module = Mod}, Pid, AllowGc) -> + erlang:check_process_code(Pid, Mod, [{async, {Tag, Pid, AllowGc}}, + {allow_gc, AllowGc}]). + +cpc_request_gc(CpcS, [Pid|Pids]) -> + cpc_request(CpcS, Pid, true), + Pids. + +cpc_init(_CpcS, [], NoReqs) -> + NoReqs; +cpc_init(CpcS, [Pid|Pids], NoReqs) -> + cpc_request(CpcS, Pid, false), + cpc_init(CpcS, Pids, NoReqs+1). + +% end of check_proc_code() implementation. is_loaded(M, Db) -> case ets:lookup(Db, M) of diff --git a/lib/kernel/test/code_SUITE.erl b/lib/kernel/test/code_SUITE.erl index cd9359f2aa..17983e972d 100644 --- a/lib/kernel/test/code_SUITE.erl +++ b/lib/kernel/test/code_SUITE.erl @@ -23,7 +23,8 @@ -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2]). -export([set_path/1, get_path/1, add_path/1, add_paths/1, del_path/1, replace_path/1, load_file/1, load_abs/1, ensure_loaded/1, - delete/1, purge/1, soft_purge/1, is_loaded/1, all_loaded/1, + delete/1, purge/1, purge_many_exits/1, soft_purge/1, is_loaded/1, + all_loaded/1, load_binary/1, dir_req/1, object_code/1, set_path_file/1, upgrade/1, sticky_dir/1, pa_pz_option/1, add_del_path/1, @@ -51,7 +52,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [set_path, get_path, add_path, add_paths, del_path, replace_path, load_file, load_abs, ensure_loaded, - delete, purge, soft_purge, is_loaded, all_loaded, + delete, purge, purge_many_exits, soft_purge, is_loaded, all_loaded, load_binary, dir_req, object_code, set_path_file, upgrade, pa_pz_option, add_del_path, dir_disappeared, @@ -369,6 +370,42 @@ purge(Config) when is_list(Config) -> process_flag(trap_exit, OldFlag), ok. +purge_many_exits(Config) when is_list(Config) -> + OldFlag = process_flag(trap_exit, true), + code:purge(code_b_test), + {'EXIT',_} = (catch code:purge({})), + false = code:purge(code_b_test), + TPids = lists:map(fun (_) -> + {code_b_test:do_spawn(), + spawn_link(fun () -> + receive + after infinity -> ok + end + end)} + end, + lists:seq(1, 1000)), + % Give them time to start... + receive after 1000 -> ok end, + true = code:delete(code_b_test), + lists:foreach(fun ({Pid1, Pid2}) -> + true = erlang:is_process_alive(Pid1), + false = code_b_test:check_exit(Pid1), + true = erlang:is_process_alive(Pid2) + end, TPids), + true = code:purge(code_b_test), + lists:foreach(fun ({Pid1, Pid2}) -> + false = erlang:is_process_alive(Pid1), + true = code_b_test:check_exit(Pid1), + true = erlang:is_process_alive(Pid2), + exit(Pid2, kill) + end, TPids), + lists:foreach(fun ({_Pid1, Pid2}) -> + receive {'EXIT', Pid2, _} -> ok end + end, TPids), + process_flag(trap_exit, OldFlag), + ok. + + soft_purge(suite) -> []; soft_purge(doc) -> []; soft_purge(Config) when is_list(Config) -> |