From 8f4c278b69fe4d613a0b865a2edac43231cad913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 30 Nov 2015 15:35:47 +0100 Subject: Allow erlang:finish_loading/1 to load more than one module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BIFs prepare_loading/2 and finish_loading/1 have been designed to allow fast loading in parallel of many modules. Because of the complications with on_load functions, the initial implementation of finish_loading/1 only allowed a single element in the list of prepared modules. finish_loading/1 does not suspend other processes, but it must wait for all schedulers to pass a write barrier ("thread progress"). The time for all schedulers to pass the write barrier is highly variable, depending on what kind of code they are executing. Therefore, allowing finish_loading/1 to finish the loading for more than one module before passing the write barrier could potentially be much faster than calling finish_loading/1 multiple times. The test case many/1 run on my computer shows that with "heavy load", finish loading of 100 modules in parallel is almost 50 times faster than loading them sequentially. With "light load", the gain is still almost 10 times. Here follows an actual sample of the output from the test case on my computer (an 2012 iMac): Light load ========== Sequential: 22361 µs Parallel: 2586 µs Ratio: 9 Heavy load ========== Sequential: 254512 µs Parallel: 5246 µs Ratio: 49 --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/beam_bif_load.c | 54 ++++++--- erts/emulator/beam/beam_ranges.c | 152 ++++++++++++------------- erts/emulator/beam/code_ix.c | 4 +- erts/emulator/beam/code_ix.h | 2 +- erts/emulator/beam/global.h | 2 +- erts/emulator/beam/module.h | 1 + erts/emulator/test/Makefile | 1 + erts/emulator/test/multi_load_SUITE.erl | 190 ++++++++++++++++++++++++++++++++ 9 files changed, 303 insertions(+), 104 deletions(-) create mode 100644 erts/emulator/test/multi_load_SUITE.erl diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 6fb08ee896..a00b22eac0 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -211,6 +211,7 @@ atom dsend atom dsend_continue_trap atom dunlink atom duplicate_bag +atom duplicated atom dupnames atom einval atom elib_malloc diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c index 1b4c022370..d426b6eebb 100644 --- a/erts/emulator/beam/beam_bif_load.c +++ b/erts/emulator/beam/beam_bif_load.c @@ -86,7 +86,7 @@ BIF_RETTYPE code_make_stub_module_3(BIF_ALIST_3) ASSERT(modp->curr.num_breakpoints == 0); } - erts_start_staging_code_ix(); + erts_start_staging_code_ix(1); res = erts_make_stub_module(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); @@ -163,14 +163,13 @@ exception_list(Process* p, Eterm tag, struct m* mp, Sint exceptions) Eterm* hp = HAlloc(p, 3 + 2*exceptions); Eterm res = NIL; - mp += exceptions - 1; while (exceptions > 0) { if (mp->exception) { res = CONS(hp, mp->module, res); hp += 2; exceptions--; } - mp--; + mp++; } return TUPLE2(hp, tag, res); } @@ -202,8 +201,12 @@ finish_loading_1(BIF_ALIST_1) n = erts_list_length(BIF_ARG_1); if (n < 0) { - ERTS_BIF_PREP_ERROR(res, BIF_P, BADARG); - goto done; + badarg: + if (p) { + erts_free(ERTS_ALC_T_LOADER_TMP, p); + } + erts_release_code_write_permission(); + BIF_ERROR(BIF_P, BADARG); } p = erts_alloc(ERTS_ALC_T_LOADER_TMP, n*sizeof(struct m)); @@ -218,29 +221,32 @@ finish_loading_1(BIF_ALIST_1) ProcBin* pb; if (!ERTS_TERM_IS_MAGIC_BINARY(term)) { - ERTS_BIF_PREP_ERROR(res, BIF_P, BADARG); - goto done; + goto badarg; } pb = (ProcBin*) binary_val(term); p[i].code = pb->val; p[i].module = erts_module_for_prepared_code(p[i].code); if (p[i].module == NIL) { - ERTS_BIF_PREP_ERROR(res, BIF_P, BADARG); - goto done; + goto badarg; } BIF_ARG_1 = CDR(cons); } /* * Since we cannot handle atomic loading of a group of modules - * if one or more of them uses on_load, we will only allow one - * element in the list. This limitation is intended to be - * lifted in the future. + * if one or more of them uses on_load, we will only allow + * more than one element in the list if none of the modules + * have an on_load function. */ if (n > 1) { - ERTS_BIF_PREP_ERROR(res, BIF_P, SYSTEM_LIMIT); - goto done; + for (i = 0; i < n; i++) { + if (erts_has_code_on_load(p[i].code) == am_true) { + erts_free(ERTS_ALC_T_LOADER_TMP, p); + erts_release_code_write_permission(); + BIF_ERROR(BIF_P, SYSTEM_LIMIT); + } + } } /* @@ -252,11 +258,27 @@ finish_loading_1(BIF_ALIST_1) */ res = am_ok; - erts_start_staging_code_ix(); + erts_start_staging_code_ix(n); for (i = 0; i < n; i++) { p[i].modp = erts_put_module(p[i].module); + p[i].modp->seen = 0; + } + + exceptions = 0; + for (i = 0; i < n; i++) { + p[i].exception = 0; + if (p[i].modp->seen) { + p[i].exception = 1; + exceptions++; + } + p[i].modp->seen = 1; } + if (exceptions) { + res = exception_list(BIF_P, am_duplicated, p, exceptions); + goto done; + } + for (i = 0; i < n; i++) { if (p[i].modp->curr.num_breakpoints > 0 || p[i].modp->curr.num_traced_exports > 0 || @@ -492,7 +514,7 @@ BIF_RETTYPE delete_module_1(BIF_ALIST_1) } { - erts_start_staging_code_ix(); + erts_start_staging_code_ix(0); code_ix = erts_staging_code_ix(); modp = erts_get_module(BIF_ARG_1, code_ix); if (!modp) { diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c index 5a2b66727a..54c337ee72 100644 --- a/erts/emulator/beam/beam_ranges.c +++ b/erts/emulator/beam/beam_ranges.c @@ -53,6 +53,7 @@ struct ranges { }; static struct ranges r[ERTS_NUM_CODE_IX]; static erts_smp_atomic_t mem_used; +static Range* write_ptr; #ifdef HARD_DEBUG static void check_consistency(struct ranges* p) @@ -72,6 +73,17 @@ static void check_consistency(struct ranges* p) # define CHECK(r) #endif /* HARD_DEBUG */ +static int +rangecompare(Range* a, Range* b) +{ + if (a->start < b->start) { + return -1; + } else if (a->start == b->start) { + return 0; + } else { + return 1; + } +} void erts_init_ranges(void) @@ -88,45 +100,70 @@ erts_init_ranges(void) } void -erts_start_staging_ranges(void) +erts_start_staging_ranges(int num_new) { + ErtsCodeIndex src = erts_active_code_ix(); ErtsCodeIndex dst = erts_staging_code_ix(); + Sint need; if (r[dst].modules) { erts_smp_atomic_add_nob(&mem_used, -r[dst].allocated); erts_free(ERTS_ALC_T_MODULE_REFS, r[dst].modules); - r[dst].modules = NULL; } + + need = r[dst].allocated = r[src].n + num_new; + erts_smp_atomic_add_nob(&mem_used, need); + write_ptr = erts_alloc(ERTS_ALC_T_MODULE_REFS, + need * sizeof(Range)); + r[dst].modules = write_ptr; } void erts_end_staging_ranges(int commit) { - ErtsCodeIndex dst = erts_staging_code_ix(); - - if (commit && r[dst].modules == NULL) { + if (commit) { Sint i; - Sint n; - - /* No modules added, just clone src and remove purged code. */ ErtsCodeIndex src = erts_active_code_ix(); + ErtsCodeIndex dst = erts_staging_code_ix(); + Range* mp; + Sint num_inserted; - erts_smp_atomic_add_nob(&mem_used, r[src].n); - r[dst].modules = erts_alloc(ERTS_ALC_T_MODULE_REFS, - r[src].n * sizeof(Range)); - r[dst].allocated = r[src].n; - n = 0; + mp = r[dst].modules; + num_inserted = write_ptr - mp; for (i = 0; i < r[src].n; i++) { Range* rp = r[src].modules+i; if (rp->start < RANGE_END(rp)) { /* Only insert a module that has not been purged. */ - r[dst].modules[n] = *rp; - n++; + write_ptr->start = rp->start; + erts_smp_atomic_init_nob(&write_ptr->end, + (erts_aint_t)(RANGE_END(rp))); + write_ptr++; + } + } + + /* + * There are num_inserted new range entries (unsorted) at the + * beginning of the modules array, followed by the old entries + * (sorted). We must now sort the entire array. + */ + + r[dst].n = write_ptr - mp; + if (num_inserted > 1) { + qsort(mp, r[dst].n, sizeof(Range), + (int (*)(const void *, const void *)) rangecompare); + } else if (num_inserted == 1) { + /* Sift the new range into place. This is faster than qsort(). */ + Range t = mp[0]; + for (i = 0; i < r[dst].n-1 && t.start > mp[i+1].start; i++) { + mp[i] = mp[i+1]; } + mp[i] = t; } - r[dst].n = n; + r[dst].modules = mp; + CHECK(&r[dst]); erts_smp_atomic_set_nob(&r[dst].mid, - (erts_aint_t) (r[dst].modules + n / 2)); + (erts_aint_t) (r[dst].modules + + r[dst].n / 2)); } } @@ -135,82 +172,29 @@ erts_update_ranges(BeamInstr* code, Uint size) { ErtsCodeIndex dst = erts_staging_code_ix(); ErtsCodeIndex src = erts_active_code_ix(); - Sint i; - Sint n; - Sint need; if (src == dst) { ASSERT(!erts_initialized); /* - * During start-up of system, the indices are the same. - * Handle this by faking a source area. + * During start-up of system, the indices are the same + * and erts_start_staging_ranges() has not been called. */ - src = (src+1) % ERTS_NUM_CODE_IX; - if (r[src].modules) { - erts_smp_atomic_add_nob(&mem_used, -r[src].allocated); - erts_free(ERTS_ALC_T_MODULE_REFS, r[src].modules); + if (r[dst].modules == NULL) { + Sint need = 128; + erts_smp_atomic_add_nob(&mem_used, need); + r[dst].modules = erts_alloc(ERTS_ALC_T_MODULE_REFS, + need * sizeof(Range)); + r[dst].allocated = need; + write_ptr = r[dst].modules; } - r[src] = r[dst]; - r[dst].modules = 0; } - CHECK(&r[src]); - - ASSERT(r[dst].modules == NULL); - need = r[dst].allocated = r[src].n + 1; - erts_smp_atomic_add_nob(&mem_used, need); - r[dst].modules = (Range *) erts_alloc(ERTS_ALC_T_MODULE_REFS, - need * sizeof(Range)); - n = 0; - for (i = 0; i < r[src].n; i++) { - Range* rp = r[src].modules+i; - if (code < rp->start) { - r[dst].modules[n].start = code; - erts_smp_atomic_init_nob(&r[dst].modules[n].end, - (erts_aint_t)(((byte *)code) + size)); - ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < code); - n++; - break; - } - if (rp->start < RANGE_END(rp)) { - /* Only insert a module that has not been purged. */ - r[dst].modules[n].start = rp->start; - erts_smp_atomic_init_nob(&r[dst].modules[n].end, - (erts_aint_t)(RANGE_END(rp))); - ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < rp->start); - n++; - } - } - - while (i < r[src].n) { - Range* rp = r[src].modules+i; - if (rp->start < RANGE_END(rp)) { - /* Only insert a module that has not been purged. */ - r[dst].modules[n].start = rp->start; - erts_smp_atomic_init_nob(&r[dst].modules[n].end, - (erts_aint_t)(RANGE_END(rp))); - ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < rp->start); - n++; - } - i++; - } - - if (n == 0 || code > r[dst].modules[n-1].start) { - r[dst].modules[n].start = code; - erts_smp_atomic_init_nob(&r[dst].modules[n].end, - (erts_aint_t)(((byte *)code) + size)); - ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < code); - n++; - } - - ASSERT(n <= r[src].n+1); - r[dst].n = n; - erts_smp_atomic_set_nob(&r[dst].mid, - (erts_aint_t) (r[dst].modules + n / 2)); - - CHECK(&r[dst]); - CHECK(&r[src]); + ASSERT(r[dst].modules); + write_ptr->start = code; + erts_smp_atomic_init_nob(&(write_ptr->end), + (erts_aint_t)(((byte *)code) + size)); + write_ptr++; } void diff --git a/erts/emulator/beam/code_ix.c b/erts/emulator/beam/code_ix.c index 209d008d18..1b0968c55c 100644 --- a/erts/emulator/beam/code_ix.c +++ b/erts/emulator/beam/code_ix.c @@ -65,12 +65,12 @@ void erts_code_ix_init(void) CIX_TRACE("init"); } -void erts_start_staging_code_ix(void) +void erts_start_staging_code_ix(int num_new) { beam_catches_start_staging(); export_start_staging(); module_start_staging(); - erts_start_staging_ranges(); + erts_start_staging_ranges(num_new); CIX_TRACE("start"); } diff --git a/erts/emulator/beam/code_ix.h b/erts/emulator/beam/code_ix.h index 5f00b409ef..7a66211a5b 100644 --- a/erts/emulator/beam/code_ix.h +++ b/erts/emulator/beam/code_ix.h @@ -100,7 +100,7 @@ void erts_release_code_write_permission(void); * Must be followed by calls to either "end" and "commit" or "abort" before * code write permission can be released. */ -void erts_start_staging_code_ix(void); +void erts_start_staging_code_ix(int num_new); /* End the staging. * Preceded by "start" and must be followed by "commit". diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index b174fa3e8a..956efa5e36 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1027,7 +1027,7 @@ Eterm erts_make_stub_module(Process* p, Eterm Mod, Eterm Beam, Eterm Info); /* beam_ranges.c */ void erts_init_ranges(void); -void erts_start_staging_ranges(void); +void erts_start_staging_ranges(int num_new); void erts_end_staging_ranges(int commit); void erts_update_ranges(BeamInstr* code, Uint size); void erts_remove_from_ranges(BeamInstr* code); diff --git a/erts/emulator/beam/module.h b/erts/emulator/beam/module.h index e66d628ca9..b7468b0926 100644 --- a/erts/emulator/beam/module.h +++ b/erts/emulator/beam/module.h @@ -37,6 +37,7 @@ struct erl_module_instance { typedef struct erl_module { IndexSlot slot; /* Must be located at top of struct! */ int module; /* Atom index for module (not tagged). */ + int seen; /* Used by finish_loading() */ struct erl_module_instance curr; struct erl_module_instance old; /* protected by "old_code" rwlock */ diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile index 987a655c3d..318db4b45e 100644 --- a/erts/emulator/test/Makefile +++ b/erts/emulator/test/Makefile @@ -74,6 +74,7 @@ MODULES= \ match_spec_SUITE \ module_info_SUITE \ monitor_SUITE \ + multi_load_SUITE \ nested_SUITE \ nif_SUITE \ node_container_SUITE \ diff --git a/erts/emulator/test/multi_load_SUITE.erl b/erts/emulator/test/multi_load_SUITE.erl new file mode 100644 index 0000000000..33e6c15f8f --- /dev/null +++ b/erts/emulator/test/multi_load_SUITE.erl @@ -0,0 +1,190 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1999-2012. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(multi_load_SUITE). +-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, + init_per_group/2,end_per_group/2, + many/1,on_load/1,errors/1]). + +-include_lib("common_test/include/ct.hrl"). + +suite() -> [{ct_hooks,[ts_install_cth]}]. + +all() -> + [many,on_load,errors]. + +groups() -> + []. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_GroupName, Config) -> + Config. + +end_per_group(_GroupName, Config) -> + Config. + +many(_Config) -> + Ms = make_modules(100, fun many_module/1), + + io:put_chars("Light load\n" + "=========="), + many_measure(Ms), + + _ = [spawn_link(fun many_worker/0) || _ <- lists:seq(1, 8)], + erlang:yield(), + io:put_chars("Heavy load\n" + "=========="), + many_measure(Ms), + + ok. + +many_module(M) -> + ["-module("++M++").", + "-compile(export_all).", + "f1() -> ok.", + "f2() -> ok.", + "f3() -> ok.", + "f4() -> ok."]. + +many_measure(Ms) -> + many_purge(Ms), + MsPrep1 = prepare_modules(Ms), + Us1 = ms(fun() -> many_load_seq(MsPrep1) end), + many_try_call(Ms), + many_purge(Ms), + MsPrep2 = prepare_modules(Ms), + Us2 = ms(fun() -> many_load_par(MsPrep2) end), + many_try_call(Ms), + io:format("# modules: ~9w\n" + "Sequential: ~9w µs\n" + "Parallel: ~9w µs\n" + "Ratio: ~9w\n", + [length(Ms),Us1,Us2,round(Us1/Us2)]), + ok. + +many_load_seq(Ms) -> + [erlang:finish_loading([M]) || M <- Ms], + ok. + +many_load_par(Ms) -> + erlang:finish_loading(Ms). + +many_purge(Ms) -> + _ = [catch erlang:purge_module(M) || {M,_} <- Ms], + ok. + +many_try_call(Ms) -> + _ = [begin + ok = M:f1(), + ok = M:f2(), + ok = M:f3(), + ok = M:f4() + end || {M,_} <- Ms], + ok. + +many_worker() -> + many_worker(lists:seq(1, 100)). + +many_worker(L) -> + N0 = length(L), + N1 = N0 * N0 * N0, + N2 = N1 div (N0 * N0), + N3 = N2 + 10, + _ = N3 - 10, + many_worker(L). + + +on_load(_Config) -> + On = make_modules(2, fun on_load_module/1), + OnPrep = prepare_modules(On), + {'EXIT',{system_limit,_}} = (catch erlang:finish_loading(OnPrep)), + + Normal = make_modules(1, fun on_load_normal/1), + Mixed = Normal ++ tl(On), + MixedPrep = prepare_modules(Mixed), + {'EXIT',{system_limit,_}} = (catch erlang:finish_loading(MixedPrep)), + + SingleOnPrep = tl(OnPrep), + {on_load,[OnLoadMod]} = erlang:finish_loading(SingleOnPrep), + ok = erlang:call_on_load_function(OnLoadMod), + + ok. + +on_load_module(M) -> + ["-module("++M++").", + "-on_load(f/0).", + "f() -> ok."]. + +on_load_normal(M) -> + ["-module("++M++")."]. + + +errors(_Config) -> + finish_loading_badarg(x), + finish_loading_badarg([x|y]), + finish_loading_badarg([x]), + finish_loading_badarg([<<>>]), + + Mods = make_modules(2, fun errors_module/1), + Ms = lists:sort([M || {M,_} <- Mods]), + Prep = prepare_modules(Mods), + {duplicated,Dups} = erlang:finish_loading(Prep ++ Prep), + Ms = lists:sort(Dups), + ok. + +finish_loading_badarg(Arg) -> + {'EXIT',{badarg,[{erlang,finish_loading,[Arg],_}|_]}} = + (catch erlang:finish_loading(Arg)). + +errors_module(M) -> + ["-module("++M++").", + "-export([f/0]).", + "f() -> ok."]. + +%%% +%%% Common utilities +%%% + +ms(Fun) -> + {Ms,ok} = timer:tc(Fun), + Ms. + +make_modules(0, _) -> + []; +make_modules(N, Fun) -> + U = erlang:unique_integer([positive]), + M0 = "m__" ++ integer_to_list(N) ++ "_" ++ integer_to_list(U), + Contents = Fun(M0), + Forms = [make_form(S) || S <- Contents], + {ok,M,Code} = compile:forms(Forms), + [{M,Code}|make_modules(N-1, Fun)]. + +make_form(S) -> + {ok,Toks,_} = erl_scan:string(S), + {ok,Form} = erl_parse:parse_form(Toks), + Form. + +prepare_modules(Ms) -> + [erlang:prepare_loading(M, Code) || {M,Code} <- Ms]. -- cgit v1.2.3