diff options
author | John Högberg <[email protected]> | 2018-03-27 13:14:40 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2018-04-23 13:13:53 +0200 |
commit | 573a5abd9d6b1668b49376b489b187780c7125c7 (patch) | |
tree | fe720d8a93b7a5199c2ef2ffe306396a6cb13f92 /erts/emulator/beam | |
parent | 26d72d02167aed57e43f1ad669039b96aa154fb8 (diff) | |
download | otp-573a5abd9d6b1668b49376b489b187780c7125c7.tar.gz otp-573a5abd9d6b1668b49376b489b187780c7125c7.tar.bz2 otp-573a5abd9d6b1668b49376b489b187780c7125c7.zip |
erts: Rewrite memory instrumentation
This commit replaces the old memory instrumentation with a new
implementation that scans carriers instead of wrapping
erts_alloc/erts_free. The old implementation could not extract
information without halting the emulator, had considerable runtime
overhead, and the memory maps it produced were noisy and lacked
critical information.
Since the new implementation walks through existing data structures
there's no longer a need to start the emulator with special flags to
get information about carrier utilization/fragmentation. Memory
fragmentation is also easier to diagnose as it's presented on a
per-carrier basis which eliminates the need to account for "holes"
between mmap segments.
To help track allocations, each allocation can now be tagged with
what it is and who allocated it at the cost of one extra word per
allocation. This is controlled on a per-allocator basis with the
+M<S>atags option, and is enabled by default for binary_alloc and
driver_alloc (which is also used by NIFs).
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r-- | erts/emulator/beam/bif.tab | 2 | ||||
-rw-r--r-- | erts/emulator/beam/break.c | 15 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc.c | 98 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.c | 1384 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.h | 35 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 96 | ||||
-rw-r--r-- | erts/emulator/beam/erl_init.c | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_instrument.c | 1257 | ||||
-rw-r--r-- | erts/emulator/beam/erl_instrument.h | 42 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 4 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 1 |
11 files changed, 1443 insertions, 1493 deletions
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 687fd39d58..bcaf9277bf 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -693,3 +693,5 @@ bif erts_internal:new_connection/1 bif erts_internal:abort_connection/2 bif erts_internal:map_next/3 bif ets:whereis/1 +bif erts_internal:gather_alloc_histograms/1 +bif erts_internal:gather_carrier_info/1 diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index ba8cc5e2ba..9ff52c92b8 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -36,7 +36,6 @@ #include "hash.h" #include "atom.h" #include "beam_load.h" -#include "erl_instrument.h" #include "erl_hl_timer.h" #include "erl_thr_progress.h" #include "erl_proc_sig_queue.h" @@ -955,20 +954,6 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) erts_cbprintf(to, to_arg, "=atoms\n"); dump_atoms(to, to_arg); - /* Keep the instrumentation data at the end of the dump */ - if (erts_instr_memory_map || erts_instr_stat) { - erts_cbprintf(to, to_arg, "=instr_data\n"); - - if (erts_instr_stat) { - erts_cbprintf(to, to_arg, "=memory_status\n"); - erts_instr_dump_stat_to(to, to_arg, 0); - } - if (erts_instr_memory_map) { - erts_cbprintf(to, to_arg, "=memory_map\n"); - erts_instr_dump_memory_map_to(to, to_arg); - } - } - erts_cbprintf(to, to_arg, "=end\n"); if (fp) { fclose(fp); diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 061b9df627..d99d2ea57b 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -38,7 +38,7 @@ #include "erl_db.h" #include "erl_binary.h" #include "erl_bits.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "erl_mseg.h" #include "erl_monitor_link.h" #include "erl_hl_timer.h" @@ -202,8 +202,6 @@ typedef struct { int top_pad; AlcUInit_t alloc_util; struct { - int stat; - int map; char *mtrace; char *nodename; } instr; @@ -428,6 +426,7 @@ set_default_binary_alloc_opts(struct au_init *ip) #endif ip->init.util.ts = ERTS_ALC_MTA_BINARY; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; } static void @@ -464,6 +463,7 @@ set_default_driver_alloc_opts(struct au_init *ip) #endif ip->init.util.ts = ERTS_ALC_MTA_DRIVER; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; } static void @@ -501,6 +501,7 @@ set_default_test_alloc_opts(struct au_init *ip) ip->init.util.mmbcs = 0; /* Main carrier size */ ip->init.util.ts = ERTS_ALC_MTA_TEST; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; /* Use a constant minimal MBC size */ #if ERTS_SA_MB_CARRIERS @@ -906,7 +907,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) &test_alloc_state); erts_mtrace_install_wrapper_functions(); - extra_block_size += erts_instr_init(init.instr.stat, init.instr.map); init_aireq_alloc(); @@ -1411,7 +1411,9 @@ handle_au_arg(struct au_init *auip, } if (!strategy_support_carrier_migration(auip)) auip->init.util.acul = 0; - } + } else if (has_prefix("atags", sub_param)) { + auip->init.util.atags = get_bool_value(sub_param + 5, argv, ip); + } else goto bad_switch; break; @@ -1741,24 +1743,6 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) break; case 'i': switch (argv[i][3]) { - case 's': - arg = get_value(argv[i]+4, argv, &i); - if (sys_strcmp("true", arg) == 0) - init->instr.stat = 1; - else if (sys_strcmp("false", arg) == 0) - init->instr.stat = 0; - else - bad_value(param, param+3, arg); - break; - case 'm': - arg = get_value(argv[i]+4, argv, &i); - if (sys_strcmp("true", arg) == 0) - init->instr.map = 1; - else if (sys_strcmp("false", arg) == 0) - init->instr.map = 0; - else - bad_value(param, param+3, arg); - break; case 't': init->instr.mtrace = get_value(argv[i]+4, argv, &i); break; @@ -1817,9 +1801,7 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) case '-': if (argv[i][2] == '\0') { /* End of system flags reached */ - if (init->instr.mtrace - /* || init->instr.stat - || init->instr.map */) { + if (init->instr.mtrace) { while (i < *argc) { if(sys_strcmp(argv[i], "-sname") == 0 || sys_strcmp(argv[i], "-name") == 0) { @@ -2097,7 +2079,7 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) * NOTE! When updating this function, make sure to also update * erlang:memory/[0,1] in $ERL_TOP/erts/preloaded/src/erlang.erl */ -#define ERTS_MEM_NEED_ALL_ALCU (!erts_instr_stat && want_tot_or_sys) +#define ERTS_MEM_NEED_ALL_ALCU (want_tot_or_sys) struct { int total; int processes; @@ -2108,7 +2090,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) int binary; int code; int ets; - int maximum; } want = {0}; struct { UWord total; @@ -2120,7 +2101,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) UWord binary; UWord code; UWord ets; - UWord maximum; } size = {0}; Eterm atoms[sizeof(size)/sizeof(UWord)]; UWord *uintps[sizeof(size)/sizeof(UWord)]; @@ -2173,12 +2153,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) want.ets = 1; atoms[length] = am_ets; uintps[length++] = &size.ets; - - want.maximum = erts_instr_stat; - if (want.maximum) { - atoms[length] = am_maximum; - uintps[length++] = &size.maximum; - } } else { DeclareTmpHeapNoproc(tmp_heap,2); @@ -2260,18 +2234,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) uintps[length++] = &size.ets; } break; - case am_maximum: - if (erts_instr_stat) { - if (!want.maximum) { - want.maximum = 1; - atoms[length] = am_maximum; - uintps[length++] = &size.maximum; - } - } else { - UnUseTmpHeapNoproc(2); - return am_badarg; - } - break; default: UnUseTmpHeapNoproc(2); return am_badarg; @@ -2437,14 +2399,7 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) size.ets += erts_get_ets_misc_mem_size(); } - if (erts_instr_stat && (want_tot_or_sys || want.maximum)) { - if (want_tot_or_sys) { - size.total = erts_instr_get_total(); - size.system = size.total - size.processes; - } - size.maximum = erts_instr_get_max_total(); - } - else if (want_tot_or_sys) { + if (want_tot_or_sys) { size.system = size.total - size.processes; } @@ -2522,18 +2477,6 @@ erts_allocated_areas(fmtfn_t *print_to_p, void *print_to_arg, void *proc) i = 0; - if (erts_instr_stat) { - values[i].arity = 2; - values[i].name = "total"; - values[i].ui[0] = erts_instr_get_total(); - i++; - - values[i].arity = 2; - values[i].name = "maximum"; - values[i].ui[0] = erts_instr_get_max_total(); - i++; - } - values[i].arity = 2; values[i].name = "sys_misc"; values[i].ui[0] = erts_sys_misc_mem_sz(); @@ -2824,10 +2767,7 @@ erts_allocator_info(fmtfn_t to, void *arg) erts_alcu_au_info_options(&to, arg, NULL, NULL); erts_print(to, arg, "=allocator:instr\n"); - erts_print(to, arg, "option m: %s\n", - erts_instr_memory_map ? "true" : "false"); - erts_print(to, arg, "option s: %s\n", - erts_instr_stat ? "true" : "false"); + erts_print(to, arg, "option t: %s\n", erts_mtrace_enabled ? "true" : "false"); @@ -2933,16 +2873,12 @@ erts_allocator_options(void *proc) NULL, hpp, szp); #endif { - Eterm o[3], v[3]; - o[0] = am_atom_put("m", 1); - v[0] = erts_instr_memory_map ? am_true : am_false; - o[1] = am_atom_put("s", 1); - v[1] = erts_instr_stat ? am_true : am_false; - o[2] = am_atom_put("t", 1); - v[2] = erts_mtrace_enabled ? am_true : am_false; + Eterm o[1], v[1]; + o[0] = am_atom_put("t", 1); + v[0] = erts_mtrace_enabled ? am_true : am_false; atoms[length] = am_atom_put("instr", 5); - terms[length++] = erts_bld_2tup_list(hpp, szp, 3, o, v); + terms[length++] = erts_bld_2tup_list(hpp, szp, 1, o, v); } atoms[length] = am_atom_put("lock_physical_memory", 20); @@ -3458,8 +3394,8 @@ badarg: /* * The allocator wrapper prelocking stuff below is about the locking order. - * It only affects wrappers (erl_mtrace.c and erl_instrument.c) that keep locks - * during alloc/realloc/free. + * It only affects wrappers (erl_mtrace.c) that keep locks during + * alloc/realloc/free. * * Some query functions in erl_alloc_util.c lock the allocator mutex and then * use erts_printf that in turn may call the sys allocator through the wrappers. diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index fed51eb200..fdf355d503 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -48,6 +48,8 @@ #include "erl_mseg.h" #include "erl_threads.h" #include "erl_thr_progress.h" +#include "erl_bif_unique.h" +#include "erl_nif.h" #ifdef ERTS_ENABLE_LOCK_COUNT #include "erl_lock_count.h" @@ -139,6 +141,33 @@ MBC after deallocating first block: [Carrier_t|pad|Block_t 111| udata... ] */ +/* Allocation tags ... + * + * These are added to the footer of every block when enabled. Currently they + * consist of the allocation type and an atom identifying the allocating + * driver/nif (or 'system' if that can't be determined), but the format is not + * supposed to be set in stone. + * + * The packing scheme requires that the atom values are small enough to fit + * into a word with ERTS_ALC_N_BITS to spare. Users must check for overflow + * before MAKE_ATAG(). */ + +typedef UWord alcu_atag_t; + +#define MAKE_ATAG(IdAtom, Type) \ + (ASSERT((Type) >= ERTS_ALC_N_MIN && (Type) <= ERTS_ALC_N_MAX), \ + ASSERT(atom_val(IdAtom) <= MAX_ATAG_ATOM_ID), \ + (atom_val(IdAtom) << ERTS_ALC_N_BITS) | (Type)) + +#define ATAG_ID(AT) (make_atom((AT) >> ERTS_ALC_N_BITS)) +#define ATAG_TYPE(AT) ((AT) & ERTS_ALC_N_MASK) + +#define MAX_ATAG_ATOM_ID (ERTS_UWORD_MAX >> ERTS_ALC_N_BITS) + +#define DBG_IS_VALID_ATAG(Allocator, AT) \ + (ATAG_TYPE(AT) >= ERTS_ALC_N_MIN && \ + ATAG_TYPE(AT) <= ERTS_ALC_N_MAX && \ + (Allocator)->alloc_no == ERTS_ALC_T2A(ERTS_ALC_N2T(ATAG_TYPE(AT)))) /* Blocks ... */ @@ -153,10 +182,17 @@ MBC after deallocating first block: #endif #define FBLK_FTR_SZ (sizeof(FreeBlkFtr_t)) +#define GET_BLK_ATAG(B) \ + (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1]) +#define SET_BLK_ATAG(B, T) \ + (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1] = (T)) + +#define BLK_ATAG_SZ(AP) ((AP)->atags ? sizeof(alcu_atag_t) : 0) + #define UMEMSZ2BLKSZ(AP, SZ) \ - (ABLK_HDR_SZ + (SZ) <= (AP)->min_block_size \ + (ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ) <= (AP)->min_block_size \ ? (AP)->min_block_size \ - : UNIT_CEILING(ABLK_HDR_SZ + (SZ))) + : UNIT_CEILING(ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ))) #define UMEM2BLK(P) ((Block_t *) (((char *) (P)) - ABLK_HDR_SZ)) #define BLK2UMEM(P) ((void *) (((char *) (P)) + ABLK_HDR_SZ)) @@ -688,6 +724,62 @@ static void destroy_carrier(Allctr_t *, Block_t *, Carrier_t **); static void mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp); static void dealloc_block(Allctr_t *, void *, ErtsAlcFixList_t *, int); +static alcu_atag_t determine_alloc_tag(Allctr_t *allocator, ErtsAlcType_t type) +{ + ErtsSchedulerData *esdp; + Eterm id; + + ERTS_CT_ASSERT(_unchecked_atom_val(am_system) <= MAX_ATAG_ATOM_ID); + ASSERT(allocator->atags); + + esdp = erts_get_scheduler_data(); + id = am_system; + + if (esdp) { + if (esdp->current_nif) { + Module *mod = erts_nif_get_module((esdp->current_nif)->mod_nif); + + /* Mod can be NULL if a resource destructor allocates memory after + * the module has been unloaded. */ + if (mod) { + id = make_atom(mod->module); + } + } else if (esdp->current_port) { + Port *p = esdp->current_port; + id = (p->drv_ptr)->name_atom; + } + + /* We fall back to 'system' if we can't pack the driver/NIF name into + * the tag. This may be a bit misleading but we've made no promises + * that the information is complete. + * + * This can only happen on 32-bit emulators when a new driver/NIF has + * been loaded *after* 16 million atoms have been used, and supporting + * that fringe case is not worth an extra word. 64-bit emulators are + * unaffected since the atom cache limits atom indexes to 32 bits. */ + if(MAX_ATOM_TABLE_SIZE > MAX_ATAG_ATOM_ID) { + if (atom_val(id) > MAX_ATAG_ATOM_ID) { + id = am_system; + } + } + } + + return MAKE_ATAG(id, type); +} + +static void set_alloc_tag(Allctr_t *allocator, void *p, alcu_atag_t tag) +{ + Block_t *block; + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + ASSERT(allocator->atags && p); + (void)allocator; + + block = UMEM2BLK(p); + + SET_BLK_ATAG(block, tag); +} + /* internal data... */ #if 0 @@ -4242,6 +4334,7 @@ static struct { Eterm e; Eterm t; Eterm ramv; + Eterm atags; #if HAVE_ERTS_MSEG Eterm asbcst; Eterm rsbcst; @@ -4311,7 +4404,7 @@ static struct { #endif } am; -static Eterm fix_type_atoms[ERTS_ALC_NO_FIXED_SIZES]; +static Eterm alloc_type_atoms[ERTS_ALC_N_MAX + 1]; static ERTS_INLINE void atom_init(Eterm *atom, char *name) { @@ -4342,6 +4435,7 @@ init_atoms(Allctr_t *allctr) AM_INIT(e); AM_INIT(t); AM_INIT(ramv); + AM_INIT(atags); #if HAVE_ERTS_MSEG AM_INIT(asbcst); AM_INIT(rsbcst); @@ -4413,12 +4507,12 @@ init_atoms(Allctr_t *allctr) } #endif - for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) { - ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; - char *name = (char *) ERTS_ALC_N2TD(n); - size_t len = sys_strlen(name); - fix_type_atoms[ix] = am_atom_put(name, len); - } + for (ix = ERTS_ALC_N_MIN; ix <= ERTS_ALC_N_MAX; ix++) { + const char *name = ERTS_ALC_N2TD(ix); + size_t len = sys_strlen(name); + + alloc_type_atoms[ix] = am_atom_put(name, len); + } } if (allctr && !allctr->atoms_initialized) { @@ -4531,6 +4625,7 @@ sz_info_fix(Allctr_t *allctr, ErtsAlcFixList_t *fix = &allctr->fix[ix]; UWord alloced = fix->type_size * fix->u.cpool.allocated; UWord used = fix->type_size * fix->u.cpool.used; + ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; if (print_to_p) { fmtfn_t to = *print_to_p; @@ -4538,15 +4633,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type internal: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_N2TD(n), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[n], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -4559,6 +4653,7 @@ sz_info_fix(Allctr_t *allctr, ErtsAlcFixList_t *fix = &allctr->fix[ix]; UWord alloced = fix->type_size * fix->u.nocpool.allocated; UWord used = fix->type_size*fix->u.nocpool.used; + ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; if (print_to_p) { fmtfn_t to = *print_to_p; @@ -4566,15 +4661,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_N2TD(n), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[n], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -5000,6 +5094,7 @@ info_options(Allctr_t *allctr, "option e: true\n" "option t: %s\n" "option ramv: %s\n" + "option atags: %s\n" "option sbct: %beu\n" #if HAVE_ERTS_MSEG "option asbcst: %bpu\n" @@ -5018,6 +5113,7 @@ info_options(Allctr_t *allctr, "option acul: %bpu\n", topt, allctr->ramv ? "true" : "false", + allctr->atags ? "true" : "false", allctr->sbc_threshold, #if HAVE_ERTS_MSEG allctr->mseg_opt.abs_shrink_th, @@ -5087,6 +5183,7 @@ info_options(Allctr_t *allctr, am_sbct, bld_uint(hpp, szp, allctr->sbc_threshold)); add_2tup(hpp, szp, &res, am.ramv, allctr->ramv ? am_true : am_false); + add_2tup(hpp, szp, &res, am.atags, allctr->atags ? am_true : am_false); add_2tup(hpp, szp, &res, am.t, (allctr->t ? am_true : am_false)); add_2tup(hpp, szp, &res, am.e, am_true); } @@ -5408,9 +5505,8 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t * /* ----------------------------------------------------------------------- */ static ERTS_INLINE void * -do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) +do_erts_alcu_alloc(ErtsAlcType_t type, Allctr_t *allctr, Uint size) { - Allctr_t *allctr = (Allctr_t *) extra; void *res; ASSERT(initialized); @@ -5449,10 +5545,19 @@ do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) void *erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) { + Allctr_t *allctr = (Allctr_t *) extra; void *res; + ASSERT(!"This is not thread safe"); - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5462,13 +5567,25 @@ void * erts_alcu_alloc_ts(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); - DEBUG_CHECK_ALIGNMENT(res); + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } erts_mtx_unlock(&allctr->mutex); + + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5478,6 +5595,7 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5487,11 +5605,19 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5504,10 +5630,15 @@ void * erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *pref_allctr; + alcu_atag_t tag = 0; void *res; pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } + if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); @@ -5523,12 +5654,15 @@ erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) res = do_erts_alcu_alloc(type, pref_allctr, size); } + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } + if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); DEBUG_CHECK_ALIGNMENT(res); - return res; } @@ -5537,10 +5671,9 @@ erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) /* ------------------------------------------------------------------------- */ static ERTS_INLINE void -do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, +do_erts_alcu_free(ErtsAlcType_t type, Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; ASSERT(initialized); ASSERT(allctr); @@ -5572,7 +5705,8 @@ do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, void erts_alcu_free(ErtsAlcType_t type, void *extra, void *p) { - do_erts_alcu_free(type, extra, p, NULL); + Allctr_t *allctr = (Allctr_t *) extra; + do_erts_alcu_free(type, allctr, p, NULL); } @@ -5581,7 +5715,7 @@ erts_alcu_free_ts(ErtsAlcType_t type, void *extra, void *p) { Allctr_t *allctr = (Allctr_t *) extra; erts_mtx_lock(&allctr->mutex); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); erts_mtx_unlock(&allctr->mutex); } @@ -5641,13 +5775,12 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) static ERTS_INLINE void * do_erts_alcu_realloc(ErtsAlcType_t type, - void *extra, + Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; Block_t *blk; void *res; @@ -5661,7 +5794,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); if (!p) { - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_alloc); return res; @@ -5670,7 +5803,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, #if ALLOC_ZERO_EQ_NULL if (!size) { ASSERT(p); - do_erts_alcu_free(type, extra, p, busy_pcrr_pp); + do_erts_alcu_free(type, allctr, p, busy_pcrr_pp); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_free); return NULL; @@ -5755,19 +5888,29 @@ do_erts_alcu_realloc(ErtsAlcType_t type, void * erts_alcu_realloc(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_realloc(type, extra, p, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } void * erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = erts_alcu_realloc(type, extra, p, size); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5777,23 +5920,42 @@ erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } - void * erts_alcu_realloc_ts(ErtsAlcType_t type, void *extra, void *ptr, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_realloc(type, extra, ptr, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5801,11 +5963,17 @@ void * erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = do_erts_alcu_realloc(type, extra, p, size, 0, NULL); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5815,10 +5983,17 @@ erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5829,6 +6004,7 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5838,11 +6014,19 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5857,6 +6041,7 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5866,14 +6051,16 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); if (!res) { - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); - res = erts_alcu_realloc_thr_spec(type, allctr, ptr, size); + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); } else { Block_t *blk; @@ -5885,29 +6072,34 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, cpy_size = size; sys_memcpy(res, ptr, cpy_size); do_erts_alcu_free(type, allctr, ptr, NULL); - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); } + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + + if (allctr->thread_safe) + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); return res; } static ERTS_INLINE void * -realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, +realloc_thr_pref(ErtsAlcType_t type, Allctr_t *pref_allctr, void *p, Uint size, int force_move) { void *res; - Allctr_t *pref_allctr, *used_allctr; + Allctr_t *used_allctr; UWord old_user_size; Carrier_t *busy_pcrr_p; + alcu_atag_t tag = 0; int retried; - if (!p) - return erts_alcu_alloc_thr_pref(type, extra, size); - - pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); @@ -5936,6 +6128,11 @@ restart: retried = 1; goto restart; } + + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } + if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); } @@ -5944,6 +6141,9 @@ restart: if (!res) goto unlock_ts_return; else { + if (pref_allctr->atags) { + set_alloc_tag(pref_allctr, res, tag); + } DEBUG_CHECK_ALIGNMENT(res); @@ -5974,20 +6174,34 @@ restart: } } + DEBUG_CHECK_ALIGNMENT(res); + return res; } void * erts_alcu_realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 0); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 0); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } void * erts_alcu_realloc_mv_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 1); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 1); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } @@ -6071,6 +6285,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->t = 0; allctr->ramv = init->ramv; + allctr->atags = init->atags; allctr->main_carrier_size = init->mmbcs; #if HAVE_ERTS_MSEG @@ -6320,6 +6535,1072 @@ erts_alcu_init(AlcUInit_t *init) initialized = 1; } +/* ------------------------------------------------------------------------- */ + +/* Allocation histograms and carrier information is gathered by walking through + * all carriers associated with each allocator instance. This is done as + * aux_yield_work on the scheduler that owns each instance. + * + * Yielding is implemented by temporarily inserting a "dummy carrier" at the + * last position. It's permanently "busy" so it won't get picked up by someone + * else when in the carrier pool, and we never make the employer aware of it + * through callbacks so we can't accidentally allocate on it. + * + * Plain malloc/free is used to guarantee we won't allocate with the allocator + * we're scanning. */ + +/* Yield between carriers once this many blocks have been processed. Note that + * a single carrier scan may exceed this figure. */ +#ifndef DEBUG + #define BLOCKSCAN_REDUCTIONS (8000) +#else + #define BLOCKSCAN_REDUCTIONS (400) +#endif + +/* Abort a single carrier scan after this many blocks to prevent really large + * MBCs from blocking forever. */ +#define BLOCKSCAN_BAILOUT_THRESHOLD (16000) + +typedef struct alcu_blockscan { + /* A per-scheduler list used when multiple scans have been queued. The + * current scanner will always run until completion/abort before moving on + * to the next. */ + struct alcu_blockscan *scanner_queue; + + Allctr_t *allocator; + Process *process; + + int (*current_op)(struct alcu_blockscan *scanner); + int (*next_op)(struct alcu_blockscan *scanner); + int reductions; + + ErtsAlcCPoolData_t *cpool_cursor; + CarrierList_t *current_clist; + Carrier_t *clist_cursor; + Carrier_t dummy_carrier; + + /* Called if the process that started this job dies before we're done. */ + void (*abort)(void *user_data); + + /* Called on each carrier. The callback must return the number of blocks + * scanned to yield properly between carriers. + * + * Note that it's not possible to "yield back" into a carrier. */ + int (*scan)(Allctr_t *, void *user_data, Carrier_t *); + + /* Called when all carriers have been scanned. The callback may return + * non-zero to yield. */ + int (*finish)(void *user_data); + + void *user_data; +} blockscan_t; + +static Carrier_t *blockscan_restore_clist_cursor(blockscan_t *state) +{ + Carrier_t *cursor = state->clist_cursor; + + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + if (cursor == &state->dummy_carrier) { + cursor = cursor->next; + + unlink_carrier(state->current_clist, state->clist_cursor); + } + + return cursor; +} + +static void blockscan_save_clist_cursor(blockscan_t *state, Carrier_t *after) +{ + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + state->clist_cursor = &state->dummy_carrier; + + (state->clist_cursor)->next = after->next; + (state->clist_cursor)->prev = after; + + relink_carrier(state->current_clist, state->clist_cursor); +} + +static int blockscan_clist_yielding(blockscan_t *state) +{ + Carrier_t *cursor = blockscan_restore_clist_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor) { + /* Skip dummy carriers inserted by another (concurrent) block scan. + * This can happen when scanning thread-safe allocators from multiple + * schedulers. */ + if (CARRIER_SZ(cursor) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + cursor); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_clist_cursor(state, cursor); + return 1; + } + } + + cursor = cursor->next; + } + + return 0; +} + +static ErtsAlcCPoolData_t *blockscan_restore_cpool_cursor(blockscan_t *state) +{ + ErtsAlcCPoolData_t *cursor; + + cursor = cpool_aint2cpd(cpool_read(&(state->cpool_cursor)->next)); + + if (state->cpool_cursor == &state->dummy_carrier.cpool) { + cpool_delete(state->allocator, state->allocator, &state->dummy_carrier); + } + + return cursor; +} + +static void blockscan_save_cpool_cursor(blockscan_t *state, + ErtsAlcCPoolData_t *after) +{ + ErtsAlcCPoolData_t *dummy_carrier, *prev_carrier, *next_carrier; + + dummy_carrier = &state->dummy_carrier.cpool; + + next_carrier = cpool_aint2cpd(cpool_mod_mark(&after->next)); + prev_carrier = cpool_aint2cpd(cpool_mod_mark(&next_carrier->prev)); + + cpool_init(&dummy_carrier->next, (erts_aint_t)next_carrier); + cpool_init(&dummy_carrier->prev, (erts_aint_t)prev_carrier); + + cpool_set_mod_marked(&prev_carrier->next, + (erts_aint_t)dummy_carrier, + (erts_aint_t)next_carrier); + cpool_set_mod_marked(&next_carrier->prev, + (erts_aint_t)dummy_carrier, + (erts_aint_t)prev_carrier); + + state->cpool_cursor = dummy_carrier; +} + +static int blockscan_cpool_yielding(blockscan_t *state) +{ + ErtsAlcCPoolData_t *sentinel, *cursor; + + sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel; + cursor = blockscan_restore_cpool_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor != sentinel) { + Carrier_t *carrier; + erts_aint_t exp; + + /* When a deallocation happens on a pooled carrier it will be routed to + * its owner, so the only way to be sure that it isn't modified while + * scanning is to skip all carriers that aren't ours. The deallocations + * deferred to us will get handled when we're done. */ + while (cursor->orig_allctr != state->allocator) { + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + + if (cursor == sentinel) { + return 0; + } + } + + carrier = ErtsContainerStruct(cursor, Carrier_t, cpool); + exp = erts_atomic_read_rb(&carrier->allctr); + + if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { + ASSERT(state->allocator == (Allctr_t*)(exp & ~ERTS_CRR_ALCTR_FLG_MASK)); + ASSERT(!(exp & ERTS_CRR_ALCTR_FLG_BUSY)); + + if (erts_atomic_cmpxchg_acqb(&carrier->allctr, + exp | ERTS_CRR_ALCTR_FLG_BUSY, + exp) == exp) { + /* Skip dummy carriers inserted by another (concurrent) block + * scan. This can happen when scanning thread-safe allocators + * from multiple schedulers. */ + if (CARRIER_SZ(carrier) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + carrier); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_cpool_cursor(state, cursor); + erts_atomic_set_relb(&carrier->allctr, exp); + + return 1; + } + } + + erts_atomic_set_relb(&carrier->allctr, exp); + } + } + + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + } + + return 0; +} + +static int blockscan_yield_helper(blockscan_t *state, + int (*yielding_op)(blockscan_t*)) +{ + /* Note that we don't check whether to abort here; only yielding_op knows + * whether the carrier is still in the list/pool. */ + + if ((state->allocator)->thread_safe) { + /* Locked scans have to be as short as possible. */ + state->reductions = 1; + + erts_mtx_lock(&(state->allocator)->mutex); + } else { + state->reductions = BLOCKSCAN_REDUCTIONS; + } + + if (yielding_op(state)) { + state->next_op = state->current_op; + } + + if ((state->allocator)->thread_safe) { + erts_mtx_unlock(&(state->allocator)->mutex); + } + + return 1; +} + +/* */ + +static int blockscan_finish(blockscan_t *state) +{ + if (ERTS_PROC_IS_EXITING(state->process)) { + state->abort(state->user_data); + return 0; + } + + state->current_op = blockscan_finish; + + return state->finish(state->user_data); +} + +static int blockscan_sweep_sbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_sbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_SBC, state->allocator); + state->current_clist = &(state->allocator)->sbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_sbcs; + state->next_op = blockscan_finish; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_mbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_mbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + state->current_clist = &(state->allocator)->mbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_mbcs; + state->next_op = blockscan_sweep_sbcs; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_cpool(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_cpool) { + ErtsAlcCPoolData_t *sentinel; + + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel; + state->cpool_cursor = sentinel; + } + + state->current_op = blockscan_sweep_cpool; + state->next_op = blockscan_sweep_mbcs; + + return blockscan_yield_helper(state, blockscan_cpool_yielding); +} + +static int blockscan_get_specific_allocator(int allocator_num, + int sched_id, + Allctr_t **out) +{ + ErtsAllocatorInfo_t *ai; + Allctr_t *allocator; + + ASSERT(allocator_num >= ERTS_ALC_A_MIN && + allocator_num <= ERTS_ALC_A_MAX); + ASSERT(sched_id >= 0 && sched_id <= erts_no_schedulers); + + ai = &erts_allctrs_info[allocator_num]; + + if (!ai->enabled || !ai->alloc_util) { + return 0; + } + + if (!ai->thr_spec) { + if (sched_id != 0) { + /* Only thread-specific allocators can be scanned on a specific + * scheduler. */ + return 0; + } + + allocator = (Allctr_t*)ai->extra; + ASSERT(allocator->thread_safe); + } else { + ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t*)ai->extra; + + ASSERT(sched_id < tspec->size); + + allocator = tspec->allctr[sched_id]; + } + + *out = allocator; + + return 1; +} + +static void blockscan_sched_trampoline(void *arg) +{ + ErtsAlcuBlockscanYieldData *yield; + ErtsSchedulerData *esdp; + blockscan_t *scanner; + + esdp = erts_get_scheduler_data(); + scanner = (blockscan_t*)arg; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (yield->last != NULL) { + blockscan_t *prev_scanner = yield->last; + + ASSERT(prev_scanner->scanner_queue == NULL); + + prev_scanner->scanner_queue = scanner; + } else { + yield->current = scanner; + } + + scanner->scanner_queue = NULL; + yield->last = scanner; + + erts_notify_new_aux_yield_work(esdp); +} + +static void blockscan_dispatch(blockscan_t *scanner, Process *owner, + Allctr_t *allocator, int sched_id) +{ + ASSERT(erts_get_scheduler_id() != 0); + + if (sched_id == 0) { + /* Global instances are always handled on the current scheduler. */ + sched_id = ERTS_ALC_GET_THR_IX(); + ASSERT(allocator->thread_safe); + } + + scanner->allocator = allocator; + scanner->process = owner; + + erts_proc_inc_refc(scanner->process); + + cpool_init_carrier_data(scanner->allocator, &scanner->dummy_carrier); + erts_atomic_init_nob(&(scanner->dummy_carrier).allctr, + (erts_aint_t)allocator | ERTS_CRR_ALCTR_FLG_BUSY); + + if (ERTS_ALC_IS_CPOOL_ENABLED(scanner->allocator)) { + scanner->next_op = blockscan_sweep_cpool; + } else { + scanner->next_op = blockscan_sweep_mbcs; + } + + /* Aux yield jobs can only be set up while running on the scheduler that + * services them, so we move there before continuing. + * + * We can't drive the scan itself through this since the scheduler will + * always finish *all* misc aux work in one go which makes it impossible to + * yield. */ + erts_schedule_misc_aux_work(sched_id, blockscan_sched_trampoline, scanner); +} + +int erts_handle_yielded_alcu_blockscan(ErtsSchedulerData *esdp, + ErtsAlcuBlockscanYieldData *yield) +{ + blockscan_t *scanner = yield->current; + + (void)esdp; + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (scanner) { + if (scanner->next_op(scanner)) { + return 1; + } + + ASSERT(ERTS_PROC_IS_EXITING(scanner->process) || + scanner->current_op == blockscan_finish); + + yield->current = scanner->scanner_queue; + + if (yield->current == NULL) { + ASSERT(scanner == yield->last); + yield->last = NULL; + } + + erts_proc_dec_refc(scanner->process); + + /* Plain free is intentional. */ + free(scanner); + + return yield->current != NULL; + } + + return 0; +} + +void erts_alcu_sched_spec_data_init(ErtsSchedulerData *esdp) +{ + ErtsAlcuBlockscanYieldData *yield; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + yield->current = NULL; + yield->last = NULL; +} + +/* ------------------------------------------------------------------------- */ + +static ERTS_INLINE int u64_log2(Uint64 v) +{ + static const int log2_tab64[64] = { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5}; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return log2_tab64[((Uint64)((v - (v >> 1))*0x07EDD5E59A4E28C2)) >> 58]; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct hist_tree__ { + struct hist_tree__ *parent; + struct hist_tree__ *left; + struct hist_tree__ *right; + + int is_red; + + alcu_atag_t tag; + UWord histogram[1]; +} hist_tree_t; + +#define ERTS_RBT_PREFIX hist_tree +#define ERTS_RBT_T hist_tree_t +#define ERTS_RBT_KEY_T UWord +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) ((void)0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->tag) +#define ERTS_RBT_IS_LT(KX, KY) (KX < KY) +#define ERTS_RBT_IS_EQ(KX, KY) (KX == KY) +#define ERTS_RBT_WANT_FOREACH_DESTROY_YIELDING +#define ERTS_RBT_WANT_FOREACH_DESTROY +#define ERTS_RBT_WANT_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + hist_tree_rbt_yield_state_t hist_tree_yield; + hist_tree_t *hist_tree; + UWord hist_count; + + UWord hist_slot_start; + int hist_slot_count; + + UWord unscanned_size; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_ahist_t; + +static void gather_ahist_update(gather_ahist_t *state, UWord tag, UWord size) +{ + hist_tree_t *hist_node; + UWord size_interval; + int hist_slot; + + hist_node = hist_tree_rbt_lookup(state->hist_tree, tag); + + if (hist_node == NULL) { + /* Plain calloc is intentional. */ + hist_node = (hist_tree_t*)calloc(1, sizeof(hist_tree_t) + + (state->hist_slot_count - 1) * + sizeof(hist_node->histogram[0])); + hist_node->tag = tag; + + hist_tree_rbt_insert(&state->hist_tree, hist_node); + state->hist_count++; + } + + size_interval = (size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + hist_node->histogram[hist_slot]++; +} + +static int gather_ahist_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_ahist_t *state; + int blocks_scanned; + Block_t *block; + + state = (gather_ahist_t*)user_data; + blocks_scanned = 1; + + if (IS_SB_CARRIER(carrier)) { + alcu_atag_t tag; + + block = SBC2BLK(allocator, carrier); + tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + + gather_ahist_update(state, tag, SBC_BLK_SZ(block)); + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + ASSERT(IS_MB_CARRIER(carrier)); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + if (IS_ALLOCED_BLK(block)) { + alcu_atag_t tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + + gather_ahist_update(state, tag, block_size); + } + + scanned_bytes += block_size; + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + state->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + return blocks_scanned; +} + +static void gather_ahist_append_result(hist_tree_t *node, void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + Eterm histogram_tuple, tag_tuple; + + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + hp = erts_produce_heap(&state->msg_factory, 7 + state->hist_slot_count, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(node->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + hp[0] = make_arityval(3); + hp[1] = ATAG_ID(node->tag); + hp[2] = alloc_type_atoms[ATAG_TYPE(node->tag)]; + hp[3] = histogram_tuple; + + tag_tuple = make_tuple(hp); + hp += 4; + + state->result_list = CONS(hp, tag_tuple, state->result_list); + + /* Plain free is intentional. */ + free(node); +} + +static void gather_ahist_send(gather_ahist_t *state) +{ + Eterm result_tuple, unscanned_size, task_ref; + + Uint term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->hist_count > 0)); + ASSERT(state->building_result); + + term_size = 4 + erts_iref_storage_heap_size(&state->iref); + term_size += IS_USMALL(0, state->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + unscanned_size = bld_unstable_uint(&hp, NULL, state->unscanned_size); + + hp[0] = make_arityval(3); + hp[1] = task_ref; + hp[2] = unscanned_size; + hp[3] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_ahist_finish(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, unscanned size, [{Tag, {Histogram}} | Rest]} */ + minimum_size = 4 + erts_iref_storage_heap_size(&state->iref) + + state->hist_count * (7 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + ERTS_RBT_YIELD_STAT_INIT(&state->hist_tree_yield); + + state->result_list = NIL; + state->building_result = 1; + } + + if (hist_tree_rbt_foreach_destroy_yielding(&state->hist_tree, + &gather_ahist_append_result, + state, + &state->hist_tree_yield, + BLOCKSCAN_REDUCTIONS)) { + return 1; + } + + gather_ahist_send(state); + + return 0; +} + +static void gather_ahist_destroy_result(hist_tree_t *node, void *arg) +{ + (void)arg; + free(node); +} + +static void gather_ahist_abort(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + hist_tree_rbt_foreach_destroy(&state->hist_tree, + &gather_ahist_destroy_result, + NULL); +} + +int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_ahist_t *gather_state; + blockscan_t *scanner; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } else if (!allocator->atags) { + return 0; + } + + ensure_atoms_initialized(allocator); + + /* Plain calloc is intentional. */ + gather_state = (gather_ahist_t*)calloc(1, sizeof(gather_ahist_t)); + scanner = &gather_state->common; + + scanner->abort = gather_ahist_abort; + scanner->scan = gather_ahist_scan; + scanner->finish = gather_ahist_finish; + scanner->user_data = gather_state; + + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct chist_node__ { + struct chist_node__ *next; + + UWord carrier_size; + UWord unscanned_size; + UWord allocated_size; + + /* BLOCKSCAN_BAILOUT_THRESHOLD guarantees we won't overflow this or the + * counters in the free block histogram. */ + int allocated_count; + int flags; + + int histogram[1]; +} chist_node_t; + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + Eterm allocator_desc; + + chist_node_t *info_list; + UWord info_count; + + UWord hist_slot_start; + int hist_slot_count; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_cinfo_t; + +static int gather_cinfo_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_cinfo_t *state; + chist_node_t *node; + int blocks_scanned; + Block_t *block; + + state = (gather_cinfo_t*)user_data; + node = calloc(1, sizeof(chist_node_t) + + (state->hist_slot_count - 1) * + sizeof(node->histogram[0])); + blocks_scanned = 1; + + /* ERTS_CRR_ALCTR_FLG_BUSY is ignored since we've set it ourselves and it + * would be misleading to include it. */ + node->flags = erts_atomic_read_rb(&carrier->allctr) & + (ERTS_CRR_ALCTR_FLG_MASK & ~ERTS_CRR_ALCTR_FLG_BUSY); + node->carrier_size = CARRIER_SZ(carrier); + + if (IS_SB_CARRIER(carrier)) { + UWord block_size; + + block = SBC2BLK(allocator, carrier); + block_size = SBC_BLK_SZ(block); + + node->allocated_size = block_size; + node->allocated_count = 1; + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + scanned_bytes += block_size; + + if (IS_ALLOCED_BLK(block)) { + node->allocated_size += block_size; + node->allocated_count++; + } else { + UWord size_interval; + int hist_slot; + + size_interval = (block_size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + node->histogram[hist_slot]++; + } + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + node->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + node->next = state->info_list; + state->info_list = node; + state->info_count++; + + return blocks_scanned; +} + +static void gather_cinfo_append_result(gather_cinfo_t *state, + chist_node_t *info) +{ + Eterm carrier_size, unscanned_size, allocated_size; + Eterm histogram_tuple, carrier_tuple; + + Uint term_size; + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + term_size = 11 + state->hist_slot_count; + term_size += IS_USMALL(0, info->carrier_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->allocated_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(info->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + carrier_size = bld_unstable_uint(&hp, NULL, info->carrier_size); + unscanned_size = bld_unstable_uint(&hp, NULL, info->unscanned_size); + allocated_size = bld_unstable_uint(&hp, NULL, info->allocated_size); + + hp[0] = make_arityval(7); + hp[1] = state->allocator_desc; + hp[2] = carrier_size; + hp[3] = unscanned_size; + hp[4] = allocated_size; + hp[5] = make_small(info->allocated_count); + hp[6] = (info->flags & ERTS_CRR_ALCTR_FLG_IN_POOL) ? am_true : am_false; + hp[7] = histogram_tuple; + + carrier_tuple = make_tuple(hp); + hp += 8; + + state->result_list = CONS(hp, carrier_tuple, state->result_list); + + free(info); +} + +static void gather_cinfo_send(gather_cinfo_t *state) +{ + Eterm result_tuple, task_ref; + + int term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->info_count > 0)); + ASSERT(state->building_result); + + term_size = 3 + erts_iref_storage_heap_size(&state->iref); + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + hp[0] = make_arityval(2); + hp[1] = task_ref; + hp[2] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_cinfo_finish(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + int reductions = BLOCKSCAN_REDUCTIONS; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, [{Carrier size, unscanned size, allocated size, + * allocated block count, {Free block histogram}} | Rest]} */ + minimum_size = 3 + erts_iref_storage_heap_size(&state->iref) + + state->info_count * (11 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + state->result_list = NIL; + state->building_result = 1; + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + gather_cinfo_append_result(state, current); + + if (reductions-- <= 0) { + return 1; + } + } + + gather_cinfo_send(state); + + return 0; +} + +static void gather_cinfo_abort(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + free(current); + } +} + +int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_cinfo_t *gather_state; + blockscan_t *scanner; + + const char *allocator_desc; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } + + allocator_desc = ERTS_ALC_A2AD(allocator_num); + + /* Plain calloc is intentional. */ + gather_state = (gather_cinfo_t*)calloc(1, sizeof(gather_cinfo_t)); + scanner = &gather_state->common; + + scanner->abort = gather_cinfo_abort; + scanner->scan = gather_cinfo_scan; + scanner->finish = gather_cinfo_finish; + scanner->user_data = gather_state; + + gather_state->allocator_desc = erts_atom_put((byte *)allocator_desc, + sys_strlen(allocator_desc), + ERTS_ATOM_ENC_LATIN1, 1); + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start * 2; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ * NOTE: erts_alcu_test() is only supposed to be used for testing. * @@ -6441,6 +7722,7 @@ erts_alcu_verify_unused_ts(Allctr_t *allctr) erts_mtx_unlock(&allctr->mutex); } + #ifdef DEBUG int is_sbc_blk(Block_t* blk) { diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index 05c8a0db3b..ff4d10b206 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -50,6 +50,7 @@ typedef struct { int tspec; int tpref; int ramv; + int atags; UWord sbct; UWord asbcst; UWord rsbcst; @@ -106,6 +107,7 @@ typedef struct { 0, /* (bool) tspec: thread specific */\ 0, /* (bool) tpref: thread preferred */\ 0, /* (bool) ramv: realloc always moves */\ + 0, /* (bool) atags: tagged allocations */\ 512*1024, /* (bytes) sbct: sbc threshold */\ 2*1024*2024, /* (amount) asbcst: abs sbc shrink threshold */\ 20, /* (%) rsbcst: rel sbc shrink threshold */\ @@ -142,6 +144,7 @@ typedef struct { 0, /* (bool) tspec: thread specific */\ 0, /* (bool) tpref: thread preferred */\ 0, /* (bool) ramv: realloc always moves */\ + 0, /* (bool) atags: tagged allocations */\ 64*1024, /* (bytes) sbct: sbc threshold */\ 2*1024*2024, /* (amount) asbcst: abs sbc shrink threshold */\ 20, /* (%) rsbcst: rel sbc shrink threshold */\ @@ -224,6 +227,36 @@ void erts_lcnt_update_allocator_locks(int enable); int erts_alcu_try_set_dyn_param(Allctr_t*, Eterm param, Uint value); +/* Gathers per-tag allocation histograms from the given allocator number + * (ERTS_ALC_A_*) and scheduler id. An id of 0 means the global instance will + * be used. + * + * The results are sent to `p`, and it returns the number of messages to wait + * for. */ +int erts_alcu_gather_alloc_histograms(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref); + +/* Gathers per-carrier info from the given allocator number (ERTS_ALC_A_*) and + * scheduler id. An id of 0 means the global instance will be used. + * + * The results are sent to `p`, and it returns the number of messages to wait + * for. */ +int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref); + +struct alcu_blockscan; + +typedef struct { + struct alcu_blockscan *current; + struct alcu_blockscan *last; +} ErtsAlcuBlockscanYieldData; + +int erts_handle_yielded_alcu_blockscan(struct ErtsSchedulerData_ *esdp, + ErtsAlcuBlockscanYieldData *yield); +void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp); + #endif /* !ERL_ALLOC_UTIL__ */ #if defined(GET_ERL_ALLOC_UTIL_IMPL) && !defined(ERL_ALLOC_UTIL_IMPL__) @@ -548,6 +581,7 @@ struct Allctr_t_ { /* Options */ int t; int ramv; + int atags; Uint sbc_threshold; Uint sbc_move_threshold; Uint mbc_move_threshold; @@ -684,6 +718,7 @@ struct Allctr_t_ { #endif }; + int erts_alcu_start(Allctr_t *, AllctrInit_t *); void erts_alcu_stop(Allctr_t *); diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index d443e12409..0b186ab071 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -38,7 +38,7 @@ #include "erl_message.h" #include "erl_binary.h" #include "erl_db.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "dist.h" #include "erl_gc.h" #include "erl_cpu_topology.h" @@ -51,6 +51,7 @@ #include "erl_ptab.h" #include "erl_time.h" #include "erl_proc_sig_queue.h" +#include "erl_alloc_util.h" #ifdef HIPE #include "hipe_arch.h" #endif @@ -2151,45 +2152,6 @@ info_1_tuple(Process* BIF_P, /* Pointer to current process. */ return make_small(sizeof(UWord)); } goto badarg; - } else if (sel == am_allocated) { - if (arity == 2) { - Eterm res = THE_NON_VALUE; - char *buf; - Sint len = is_string(*tp); - if (len <= 0) - return res; - buf = (char *) erts_alloc(ERTS_ALC_T_TMP, len+1); - if (intlist_to_buf(*tp, buf, len) != len) - erts_exit(ERTS_ERROR_EXIT, "%s:%d: Internal error\n", __FILE__, __LINE__); - buf[len] = '\0'; - res = erts_instr_dump_memory_map(buf) ? am_true : am_false; - erts_free(ERTS_ALC_T_TMP, (void *) buf); - if (is_non_value(res)) - goto badarg; - return res; - } - else if (arity == 3 && tp[0] == am_status) { - if (is_atom(tp[1])) - return erts_instr_get_stat(BIF_P, tp[1], 1); - else { - Eterm res = THE_NON_VALUE; - char *buf; - Sint len = is_string(tp[1]); - if (len <= 0) - return res; - buf = (char *) erts_alloc(ERTS_ALC_T_TMP, len+1); - if (intlist_to_buf(tp[1], buf, len) != len) - erts_exit(ERTS_ERROR_EXIT, "%s:%d: Internal error\n", __FILE__, __LINE__); - buf[len] = '\0'; - res = erts_instr_dump_stat(buf, 1) ? am_true : am_false; - erts_free(ERTS_ALC_T_TMP, (void *) buf); - if (is_non_value(res)) - goto badarg; - return res; - } - } - else - goto badarg; } else if (sel == am_allocator) { switch (arity) { case 2: @@ -2557,8 +2519,6 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (BIF_ARG_1 == am_allocated_areas) { res = erts_allocated_areas(NULL, NULL, BIF_P); BIF_RET(res); - } else if (BIF_ARG_1 == am_allocated) { - BIF_RET(erts_instr_get_memory_map(BIF_P)); } else if (BIF_ARG_1 == am_hipe_architecture) { #if defined(HIPE) BIF_RET(hipe_arch_name); @@ -2699,9 +2659,6 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) sizeof(ERLANG_ARCHITECTURE)-1, NIL)); } - else if (BIF_ARG_1 == am_memory_types) { - return erts_instr_get_type_info(BIF_P); - } else if (BIF_ARG_1 == am_os_type) { BIF_RET(os_type_tuple); } @@ -4728,6 +4685,55 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2) BIF_ERROR(BIF_P, BADARG); } +static BIF_RETTYPE +gather_histograms_helper(Process * c_p, Eterm arg_tuple, + int gather(Process *, int, int, int, UWord, Eterm)) +{ + SWord hist_start, hist_width, sched_id; + int msg_count, alloc_num; + Eterm *args; + + /* This is an internal BIF, so the error checking is mostly left to erlang + * code. */ + + ASSERT(is_tuple_arity(arg_tuple, 5)); + args = tuple_val(arg_tuple); + + for (alloc_num = ERTS_ALC_A_MIN; alloc_num <= ERTS_ALC_A_MAX; alloc_num++) { + if(erts_is_atom_str(ERTS_ALC_A2AD(alloc_num), args[1], 0)) { + break; + } + } + + if (alloc_num > ERTS_ALC_A_MAX) { + BIF_ERROR(c_p, BADARG); + } + + sched_id = signed_val(args[2]); + hist_width = signed_val(args[3]); + hist_start = signed_val(args[4]); + + if (sched_id < 0 || sched_id > erts_no_schedulers) { + BIF_ERROR(c_p, BADARG); + } + + msg_count = gather(c_p, alloc_num, sched_id, hist_width, hist_start, args[5]); + + BIF_RET(make_small(msg_count)); +} + +BIF_RETTYPE erts_internal_gather_alloc_histograms_1(BIF_ALIST_1) +{ + return gather_histograms_helper(BIF_P, BIF_ARG_1, + erts_alcu_gather_alloc_histograms); +} + +BIF_RETTYPE erts_internal_gather_carrier_info_1(BIF_ALIST_1) +{ + return gather_histograms_helper(BIF_P, BIF_ARG_1, + erts_alcu_gather_carrier_info); +} + #ifdef ERTS_ENABLE_LOCK_COUNT typedef struct { diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 179822cc0b..57c6c10c7f 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -37,7 +37,7 @@ #include "erl_mseg.h" #include "erl_threads.h" #include "erl_hl_timer.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "erl_printf_term.h" #include "erl_misc_utils.h" #include "packet_parser.h" diff --git a/erts/emulator/beam/erl_instrument.c b/erts/emulator/beam/erl_instrument.c deleted file mode 100644 index 2f70e7996e..0000000000 --- a/erts/emulator/beam/erl_instrument.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2003-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "global.h" -#include "big.h" -#include "erl_instrument.h" -#include "erl_threads.h" - -typedef union { long l; double d; } Align_t; - -typedef struct { - Uint size; -#ifdef VALGRIND - void* valgrind_leak_suppressor; -#endif - Align_t mem[1]; -} StatBlock_t; - -#define STAT_BLOCK_HEADER_SIZE (sizeof(StatBlock_t) - sizeof(Align_t)) - -typedef struct MapStatBlock_t_ MapStatBlock_t; -struct MapStatBlock_t_ { - Uint size; - ErtsAlcType_t type_no; - Eterm pid; - MapStatBlock_t *prev; - MapStatBlock_t *next; - Align_t mem[1]; -}; - -#define MAP_STAT_BLOCK_HEADER_SIZE (sizeof(MapStatBlock_t) - sizeof(Align_t)) - -typedef struct { - Uint size; - Uint max_size; - Uint max_size_ever; - - Uint blocks; - Uint max_blocks; - Uint max_blocks_ever; -} Stat_t; - -static erts_mtx_t instr_mutex; -static erts_mtx_t instr_x_mutex; - -int erts_instr_memory_map; -int erts_instr_stat; - -static ErtsAllocatorFunctions_t real_allctrs[ERTS_ALC_A_MAX+1]; - -struct stats_ { - Stat_t tot; - Stat_t a[ERTS_ALC_A_MAX+1]; - Stat_t *ap[ERTS_ALC_A_MAX+1]; - Stat_t c[ERTS_ALC_C_MAX+1]; - Stat_t n[ERTS_ALC_N_MAX+1]; -}; - -static struct stats_ *stats; - -static MapStatBlock_t *mem_anchor; - -static Eterm *am_tot; -static Eterm *am_n; -static Eterm *am_a; -static Eterm *am_c; - -static int atoms_initialized; - -static struct { - Eterm total; - Eterm allocators; - Eterm classes; - Eterm types; - Eterm sizes; - Eterm blocks; - Eterm instr_hdr; -#ifdef DEBUG - Eterm end_of_atoms; -#endif -} am; - -static void ERTS_INLINE atom_init(Eterm *atom, const char *name) -{ - *atom = am_atom_put((char *) name, sys_strlen(name)); -} -#define AM_INIT(AM) atom_init(&am.AM, #AM) - -static void -init_atoms(void) -{ -#ifdef DEBUG - Eterm *atom; - for (atom = (Eterm *) &am; atom <= &am.end_of_atoms; atom++) { - *atom = THE_NON_VALUE; - } -#endif - - AM_INIT(total); - AM_INIT(allocators); - AM_INIT(classes); - AM_INIT(types); - AM_INIT(sizes); - AM_INIT(blocks); - AM_INIT(instr_hdr); - -#ifdef DEBUG - for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) { - ASSERT(*atom != THE_NON_VALUE); - } -#endif - - atoms_initialized = 1; -} - -#undef AM_INIT - -static void -init_am_tot(void) -{ - am_tot = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - sizeof(Eterm)); - atom_init(am_tot, "total"); -} - - -static void -init_am_n(void) -{ - int i; - am_n = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_N_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_N_MIN; i <= ERTS_ALC_N_MAX; i++) { - atom_init(&am_n[i], ERTS_ALC_N2TD(i)); - } - -} - -static void -init_am_c(void) -{ - int i; - am_c = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_C_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_C_MIN; i <= ERTS_ALC_C_MAX; i++) { - atom_init(&am_c[i], ERTS_ALC_C2CD(i)); - } - -} - -static void -init_am_a(void) -{ - int i; - am_a = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_A_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - atom_init(&am_a[i], ERTS_ALC_A2AD(i)); - } - -} - -static ERTS_INLINE void -stat_upd_alloc(ErtsAlcType_t n, Uint size) -{ - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - stats->ap[a]->size += size; - if (stats->ap[a]->max_size < stats->ap[a]->size) - stats->ap[a]->max_size = stats->ap[a]->size; - - stats->c[c].size += size; - if (stats->c[c].max_size < stats->c[c].size) - stats->c[c].max_size = stats->c[c].size; - - stats->n[n].size += size; - if (stats->n[n].max_size < stats->n[n].size) - stats->n[n].max_size = stats->n[n].size; - - stats->tot.size += size; - if (stats->tot.max_size < stats->tot.size) - stats->tot.max_size = stats->tot.size; - - stats->ap[a]->blocks++; - if (stats->ap[a]->max_blocks < stats->ap[a]->blocks) - stats->ap[a]->max_blocks = stats->ap[a]->blocks; - - stats->c[c].blocks++; - if (stats->c[c].max_blocks < stats->c[c].blocks) - stats->c[c].max_blocks = stats->c[c].blocks; - - stats->n[n].blocks++; - if (stats->n[n].max_blocks < stats->n[n].blocks) - stats->n[n].max_blocks = stats->n[n].blocks; - - stats->tot.blocks++; - if (stats->tot.max_blocks < stats->tot.blocks) - stats->tot.max_blocks = stats->tot.blocks; - -} - - -static ERTS_INLINE void -stat_upd_free(ErtsAlcType_t n, Uint size) -{ - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - ASSERT(stats->ap[a]->size >= size); - stats->ap[a]->size -= size; - - ASSERT(stats->c[c].size >= size); - stats->c[c].size -= size; - - ASSERT(stats->n[n].size >= size); - stats->n[n].size -= size; - - ASSERT(stats->tot.size >= size); - stats->tot.size -= size; - - ASSERT(stats->ap[a]->blocks > 0); - stats->ap[a]->blocks--; - - ASSERT(stats->c[c].blocks > 0); - stats->c[c].blocks--; - - ASSERT(stats->n[n].blocks > 0); - stats->n[n].blocks--; - - ASSERT(stats->tot.blocks > 0); - stats->tot.blocks--; - -} - - -static ERTS_INLINE void -stat_upd_realloc(ErtsAlcType_t n, Uint size, Uint old_size) -{ - if (old_size) - stat_upd_free(n, old_size); - stat_upd_alloc(n, size); -} - -/* - * stat instrumentation callback functions - */ - -static void stat_pre_lock(void) -{ - erts_mtx_lock(&instr_mutex); -} - -static void stat_pre_unlock(void) -{ - erts_mtx_unlock(&instr_mutex); -} - -static ErtsAllocatorWrapper_t instr_wrapper; - -static void * -stat_alloc(ErtsAlcType_t n, void *extra, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint ssize; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - ssize = size + STAT_BLOCK_HEADER_SIZE; - res = (*real_af->alloc)(n, real_af->extra, ssize); - if (res) { - stat_upd_alloc(n, size); - ((StatBlock_t *) res)->size = size; -#ifdef VALGRIND - /* Suppress "possibly leaks" by storing an actual dummy pointer - to the _start_ of the allocated block.*/ - ((StatBlock_t *) res)->valgrind_leak_suppressor = res; -#endif - res = (void *) ((StatBlock_t *) res)->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void * -stat_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint old_size; - Uint ssize; - void *sptr; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - sptr = (void *) (((char *) ptr) - STAT_BLOCK_HEADER_SIZE); - old_size = ((StatBlock_t *) sptr)->size; - } - else { - sptr = NULL; - old_size = 0; - } - - ssize = size + STAT_BLOCK_HEADER_SIZE; - res = (*real_af->realloc)(n, real_af->extra, sptr, ssize); - if (res) { - stat_upd_realloc(n, size, old_size); - ((StatBlock_t *) res)->size = size; -#ifdef VALGRIND - ((StatBlock_t *) res)->valgrind_leak_suppressor = res; -#endif - res = (void *) ((StatBlock_t *) res)->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void -stat_free(ErtsAlcType_t n, void *extra, void *ptr) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - void *sptr; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - sptr = (void *) (((char *) ptr) - STAT_BLOCK_HEADER_SIZE); - stat_upd_free(n, ((StatBlock_t *) sptr)->size); - } - else { - sptr = NULL; - } - - (*real_af->free)(n, real_af->extra, sptr); - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - -} - -/* - * map stat instrumentation callback functions - */ - -static void map_stat_pre_lock(void) -{ - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); -} - -static void map_stat_pre_unlock(void) -{ - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); -} - -static void * -map_stat_alloc(ErtsAlcType_t n, void *extra, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint msize; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - msize = size + MAP_STAT_BLOCK_HEADER_SIZE; - res = (*real_af->alloc)(n, real_af->extra, msize); - if (res) { - MapStatBlock_t *mb = (MapStatBlock_t *) res; - stat_upd_alloc(n, size); - - mb->size = size; - mb->type_no = n; - mb->pid = erts_get_current_pid(); - - mb->prev = NULL; - mb->next = mem_anchor; - if (mem_anchor) - mem_anchor->prev = mb; - mem_anchor = mb; - - res = (void *) mb->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void * -map_stat_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint old_size; - Uint msize; - void *mptr; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - mptr = (void *) (((char *) ptr) - MAP_STAT_BLOCK_HEADER_SIZE); - old_size = ((MapStatBlock_t *) mptr)->size; - } - else { - mptr = NULL; - old_size = 0; - } - - msize = size + MAP_STAT_BLOCK_HEADER_SIZE; - res = (*real_af->realloc)(n, real_af->extra, mptr, msize); - if (res) { - MapStatBlock_t *mb = (MapStatBlock_t *) res; - - mb->size = size; - mb->type_no = n; - mb->pid = erts_get_current_pid(); - - stat_upd_realloc(n, size, old_size); - - if (mptr != res) { - - if (mptr) { - if (mb->prev) - mb->prev->next = mb; - else { - ASSERT(mem_anchor == (MapStatBlock_t *) mptr); - mem_anchor = mb; - } - if (mb->next) - mb->next->prev = mb; - } - else { - mb->prev = NULL; - mb->next = mem_anchor; - if (mem_anchor) - mem_anchor->prev = mb; - mem_anchor = mb; - } - - } - - res = (void *) mb->mem; - } - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - } - - return res; -} - -static void -map_stat_free(ErtsAlcType_t n, void *extra, void *ptr) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - void *mptr; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - MapStatBlock_t *mb; - - mptr = (void *) (((char *) ptr) - MAP_STAT_BLOCK_HEADER_SIZE); - mb = (MapStatBlock_t *) mptr; - - stat_upd_free(n, mb->size); - - if (mb->prev) - mb->prev->next = mb->next; - else - mem_anchor = mb->next; - if (mb->next) - mb->next->prev = mb->prev; - } - else { - mptr = NULL; - } - - (*real_af->free)(n, real_af->extra, mptr); - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - } - -} - -static void dump_memory_map_to_stream(fmtfn_t to, void* to_arg) -{ - ErtsAlcType_t n; - MapStatBlock_t *bp; - int lock = !ERTS_IS_CRASH_DUMPING; - if (lock) { - ASSERT(!erts_is_allctr_wrapper_prelocked()); - erts_mtx_lock(&instr_mutex); - } - - /* Write header */ - - erts_cbprintf(to, to_arg, - "{instr_hdr,\n" - " %lu,\n" - " %lu,\n" - " {", - (unsigned long) ERTS_INSTR_VSN, - (unsigned long) MAP_STAT_BLOCK_HEADER_SIZE); - -#if ERTS_ALC_N_MIN != 1 -#error ERTS_ALC_N_MIN is not 1 -#endif - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - const char *astr; - - if (erts_allctrs_info[a].enabled) - astr = ERTS_ALC_A2AD(a); - else - astr = ERTS_ALC_A2AD(ERTS_ALC_A_SYSTEM); - - erts_cbprintf(to, to_arg, - "%s{%s,%s,%s}%s", - (n == ERTS_ALC_N_MIN) ? "" : " ", - ERTS_ALC_N2TD(n), - astr, - ERTS_ALC_C2CD(c), - (n == ERTS_ALC_N_MAX) ? "" : ",\n"); - } - - erts_cbprintf(to, to_arg, "}}.\n"); - - /* Write memory data */ - for (bp = mem_anchor; bp; bp = bp->next) { - if (is_internal_pid(bp->pid)) - erts_cbprintf(to, to_arg, - "{%lu, %lu, %lu, {%lu,%lu,%lu}}.\n", - (UWord) bp->type_no, - (UWord) bp->mem, - (UWord) bp->size, - (UWord) pid_channel_no(bp->pid), - (UWord) pid_number(bp->pid), - (UWord) pid_serial(bp->pid)); - else - erts_cbprintf(to, to_arg, - "{%lu, %lu, %lu, undefined}.\n", - (UWord) bp->type_no, - (UWord) bp->mem, - (UWord) bp->size); - } - - if (lock) - erts_mtx_unlock(&instr_mutex); -} - -int erts_instr_dump_memory_map_to(fmtfn_t to, void* to_arg) -{ - if (!erts_instr_memory_map) - return 0; - - dump_memory_map_to_stream(to, to_arg); - return 1; -} - -int erts_instr_dump_memory_map(const char *name) -{ - int fd; - - if (!erts_instr_memory_map) - return 0; - - fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0640); - if (fd < 0) - return 0; - - dump_memory_map_to_stream(erts_write_fd, (void*)&fd); - - close(fd); - return 1; -} - -Eterm erts_instr_get_memory_map(Process *proc) -{ - MapStatBlock_t *org_mem_anchor; - Eterm hdr_tuple, md_list, res; - Eterm *hp; - Uint hsz; - MapStatBlock_t *bp; -#ifdef DEBUG - Eterm *end_hp; -#endif - - if (!erts_instr_memory_map) - return am_false; - - if (!atoms_initialized) - init_atoms(); - if (!am_n) - init_am_n(); - if (!am_c) - init_am_c(); - if (!am_a) - init_am_a(); - - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - - /* Header size */ - hsz = 5 + 1 + (ERTS_ALC_N_MAX+1-ERTS_ALC_N_MIN)*(1 + 4); - - /* Memory data list */ - for (bp = mem_anchor; bp; bp = bp->next) { - if (is_internal_pid(bp->pid)) { -#if (_PID_NUM_SIZE - 1 > MAX_SMALL) - if (internal_pid_number(bp->pid) > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; -#endif -#if (_PID_SER_SIZE - 1 > MAX_SMALL) - if (internal_pid_serial(bp->pid) > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; -#endif - hsz += 4; - } - - if ((UWord) bp->mem > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; - if (bp->size > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; - - hsz += 5 + 2; - } - - hsz += 3; /* Root tuple */ - - org_mem_anchor = mem_anchor; - mem_anchor = NULL; - - erts_mtx_unlock(&instr_mutex); - - hp = HAlloc(proc, hsz); /* May end up calling map_stat_alloc() */ - - erts_mtx_lock(&instr_mutex); - -#ifdef DEBUG - end_hp = hp + hsz; -#endif - - { /* Build header */ - ErtsAlcType_t n; - Eterm type_map; - Uint *hp2 = hp; -#ifdef DEBUG - Uint *hp2_end; -#endif - - hp += (ERTS_ALC_N_MAX + 1 - ERTS_ALC_N_MIN)*4; - -#ifdef DEBUG - hp2_end = hp; -#endif - - type_map = make_tuple(hp); - *(hp++) = make_arityval(ERTS_ALC_N_MAX + 1 - ERTS_ALC_N_MIN); - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - if (!erts_allctrs_info[a].enabled) - a = ERTS_ALC_A_SYSTEM; - - *(hp++) = TUPLE3(hp2, am_n[n], am_a[a], am_c[c]); - hp2 += 4; - } - - ASSERT(hp2 == hp2_end); - - hdr_tuple = TUPLE4(hp, - am.instr_hdr, - make_small(ERTS_INSTR_VSN), - make_small(MAP_STAT_BLOCK_HEADER_SIZE), - type_map); - - hp += 5; - } - - /* Build memory data list */ - - for (md_list = NIL, bp = org_mem_anchor; bp; bp = bp->next) { - Eterm tuple; - Eterm type; - Eterm ptr; - Eterm size; - Eterm pid; - - if (is_not_internal_pid(bp->pid)) - pid = am_undefined; - else { - Eterm c; - Eterm n; - Eterm s; - -#if (ERST_INTERNAL_CHANNEL_NO > MAX_SMALL) -#error Oversized internal channel number -#endif - c = make_small(ERST_INTERNAL_CHANNEL_NO); - -#if (_PID_NUM_SIZE - 1 > MAX_SMALL) - if (internal_pid_number(bp->pid) > MAX_SMALL) { - n = uint_to_big(internal_pid_number(bp->pid), hp); - hp += BIG_UINT_HEAP_SIZE; - } - else -#endif - n = make_small(internal_pid_number(bp->pid)); - -#if (_PID_SER_SIZE - 1 > MAX_SMALL) - if (internal_pid_serial(bp->pid) > MAX_SMALL) { - s = uint_to_big(internal_pid_serial(bp->pid), hp); - hp += BIG_UINT_HEAP_SIZE; - } - else -#endif - s = make_small(internal_pid_serial(bp->pid)); - pid = TUPLE3(hp, c, n, s); - hp += 4; - } - - -#if ERTS_ALC_N_MAX > MAX_SMALL -#error Oversized memory type number -#endif - type = make_small(bp->type_no); - - if ((UWord) bp->mem > MAX_SMALL) { - ptr = uint_to_big((UWord) bp->mem, hp); - hp += BIG_UINT_HEAP_SIZE; - } - else - ptr = make_small((UWord) bp->mem); - - if (bp->size > MAX_SMALL) { - size = uint_to_big(bp->size, hp); - hp += BIG_UINT_HEAP_SIZE; - } - else - size = make_small(bp->size); - - tuple = TUPLE4(hp, type, ptr, size, pid); - hp += 5; - - md_list = CONS(hp, tuple, md_list); - hp += 2; - } - - res = TUPLE2(hp, hdr_tuple, md_list); - - ASSERT(hp + 3 == end_hp); - - if (mem_anchor) { - for (bp = mem_anchor; bp->next; bp = bp->next) - ; - ASSERT(org_mem_anchor); - org_mem_anchor->prev = bp; - bp->next = org_mem_anchor; - } - else { - mem_anchor = org_mem_anchor; - } - - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - - return res; -} - -static ERTS_INLINE void -begin_new_max_period(Stat_t *stat, int min, int max) -{ - int i; - for (i = min; i <= max; i++) { - stat[i].max_size = stat[i].size; - stat[i].max_blocks = stat[i].blocks; - } -} - -static ERTS_INLINE void -update_max_ever_values(Stat_t *stat, int min, int max) -{ - int i; - for (i = min; i <= max; i++) { - if (stat[i].max_size_ever < stat[i].max_size) - stat[i].max_size_ever = stat[i].max_size; - if (stat[i].max_blocks_ever < stat[i].max_blocks) - stat[i].max_blocks_ever = stat[i].max_blocks; - } -} - -#define bld_string erts_bld_string -#define bld_tuple erts_bld_tuple -#define bld_tuplev erts_bld_tuplev -#define bld_list erts_bld_list -#define bld_2tup_list erts_bld_2tup_list -#define bld_uint erts_bld_uint - -Eterm -erts_instr_get_stat(Process *proc, Eterm what, int begin_max_period) -{ - int i, len, max, min, allctr; - Eterm *names, *values, res; - Uint arr_size, stat_size, hsz, *hszp, *hp, **hpp; - Stat_t *stat_src, *stat; - - if (!erts_instr_stat) - return am_false; - - if (!atoms_initialized) - init_atoms(); - - if (what == am.total) { - min = 0; - max = 0; - allctr = 0; - stat_size = sizeof(Stat_t); - stat_src = &stats->tot; - if (!am_tot) - init_am_tot(); - names = am_tot; - } - else if (what == am.allocators) { - min = ERTS_ALC_A_MIN; - max = ERTS_ALC_A_MAX; - allctr = 1; - stat_size = sizeof(Stat_t)*(ERTS_ALC_A_MAX+1); - stat_src = stats->a; - if (!am_a) - init_am_a(); - names = am_a; - } - else if (what == am.classes) { - min = ERTS_ALC_C_MIN; - max = ERTS_ALC_C_MAX; - allctr = 0; - stat_size = sizeof(Stat_t)*(ERTS_ALC_C_MAX+1); - stat_src = stats->c; - if (!am_c) - init_am_c(); - names = &am_c[ERTS_ALC_C_MIN]; - } - else if (what == am.types) { - min = ERTS_ALC_N_MIN; - max = ERTS_ALC_N_MAX; - allctr = 0; - stat_size = sizeof(Stat_t)*(ERTS_ALC_N_MAX+1); - stat_src = stats->n; - if (!am_n) - init_am_n(); - names = &am_n[ERTS_ALC_N_MIN]; - } - else { - return THE_NON_VALUE; - } - - stat = (Stat_t *) erts_alloc(ERTS_ALC_T_TMP, stat_size); - - arr_size = (max - min + 1)*sizeof(Eterm); - - if (allctr) - names = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, arr_size); - - values = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, arr_size); - - erts_mtx_lock(&instr_mutex); - - update_max_ever_values(stat_src, min, max); - - sys_memcpy((void *) stat, (void *) stat_src, stat_size); - - if (begin_max_period) - begin_new_max_period(stat_src, min, max); - - erts_mtx_unlock(&instr_mutex); - - hsz = 0; - hszp = &hsz; - hpp = NULL; - - restart_bld: - - len = 0; - for (i = min; i <= max; i++) { - if (!allctr || erts_allctrs_info[i].enabled) { - Eterm s[2]; - - if (allctr) - names[len] = am_a[i]; - - s[0] = bld_tuple(hpp, hszp, 4, - am.sizes, - bld_uint(hpp, hszp, stat[i].size), - bld_uint(hpp, hszp, stat[i].max_size), - bld_uint(hpp, hszp, stat[i].max_size_ever)); - - s[1] = bld_tuple(hpp, hszp, 4, - am.blocks, - bld_uint(hpp, hszp, stat[i].blocks), - bld_uint(hpp, hszp, stat[i].max_blocks), - bld_uint(hpp, hszp, stat[i].max_blocks_ever)); - - values[len] = bld_list(hpp, hszp, 2, s); - - len++; - } - } - - res = bld_2tup_list(hpp, hszp, len, names, values); - - if (!hpp) { - hp = HAlloc(proc, hsz); - hszp = NULL; - hpp = &hp; - goto restart_bld; - } - - erts_free(ERTS_ALC_T_TMP, (void *) stat); - erts_free(ERTS_ALC_T_TMP, (void *) values); - if (allctr) - erts_free(ERTS_ALC_T_TMP, (void *) names); - - return res; -} - -static void -dump_stat_to_stream(fmtfn_t to, void* to_arg, int begin_max_period) -{ - ErtsAlcType_t i, a_max, a_min; - - erts_mtx_lock(&instr_mutex); - - erts_cbprintf(to, to_arg, - "{instr_vsn,%lu}.\n", - (unsigned long) ERTS_INSTR_VSN); - - update_max_ever_values(&stats->tot, 0, 0); - - erts_cbprintf(to, to_arg, - "{total,[{total,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}]}.\n", - (UWord) stats->tot.size, - (UWord) stats->tot.max_size, - (UWord) stats->tot.max_size_ever, - (UWord) stats->tot.blocks, - (UWord) stats->tot.max_blocks, - (UWord) stats->tot.max_blocks_ever); - - a_max = 0; - a_min = ~0; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) { - if (a_min > i) - a_min = i; - if (a_max < i) - a_max = i; - } - } - - ASSERT(ERTS_ALC_A_MIN <= a_min && a_min <= ERTS_ALC_A_MAX); - ASSERT(ERTS_ALC_A_MIN <= a_max && a_max <= ERTS_ALC_A_MAX); - ASSERT(a_min <= a_max); - - update_max_ever_values(stats->a, a_min, a_max); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == a_min ? "{allocators,\n [" : " ", - ERTS_ALC_A2AD(i), - (UWord) stats->a[i].size, - (UWord) stats->a[i].max_size, - (UWord) stats->a[i].max_size_ever, - (UWord) stats->a[i].blocks, - (UWord) stats->a[i].max_blocks, - (UWord) stats->a[i].max_blocks_ever, - i == a_max ? "]}.\n" : ",\n"); - } - } - - update_max_ever_values(stats->c, ERTS_ALC_C_MIN, ERTS_ALC_C_MAX); - - for (i = ERTS_ALC_C_MIN; i <= ERTS_ALC_C_MAX; i++) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == ERTS_ALC_C_MIN ? "{classes,\n [" : " ", - ERTS_ALC_C2CD(i), - (UWord) stats->c[i].size, - (UWord) stats->c[i].max_size, - (UWord) stats->c[i].max_size_ever, - (UWord) stats->c[i].blocks, - (UWord) stats->c[i].max_blocks, - (UWord) stats->c[i].max_blocks_ever, - i == ERTS_ALC_C_MAX ? "]}.\n" : ",\n" ); - } - - update_max_ever_values(stats->n, ERTS_ALC_N_MIN, ERTS_ALC_N_MAX); - - for (i = ERTS_ALC_N_MIN; i <= ERTS_ALC_N_MAX; i++) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == ERTS_ALC_N_MIN ? "{types,\n [" : " ", - ERTS_ALC_N2TD(i), - (UWord) stats->n[i].size, - (UWord) stats->n[i].max_size, - (UWord) stats->n[i].max_size_ever, - (UWord) stats->n[i].blocks, - (UWord) stats->n[i].max_blocks, - (UWord) stats->n[i].max_blocks_ever, - i == ERTS_ALC_N_MAX ? "]}.\n" : ",\n" ); - } - - if (begin_max_period) { - begin_new_max_period(&stats->tot, 0, 0); - begin_new_max_period(stats->a, a_min, a_max); - begin_new_max_period(stats->c, ERTS_ALC_C_MIN, ERTS_ALC_C_MAX); - begin_new_max_period(stats->n, ERTS_ALC_N_MIN, ERTS_ALC_N_MAX); - } - - erts_mtx_unlock(&instr_mutex); - -} - -int erts_instr_dump_stat_to(fmtfn_t to, void* to_arg, int begin_max_period) -{ - if (!erts_instr_stat) - return 0; - - dump_stat_to_stream(to, to_arg, begin_max_period); - return 1; -} - -int erts_instr_dump_stat(const char *name, int begin_max_period) -{ - int fd; - - if (!erts_instr_stat) - return 0; - - fd = open(name, O_WRONLY | O_CREAT | O_TRUNC,0640); - if (fd < 0) - return 0; - - dump_stat_to_stream(erts_write_fd, (void*)&fd, begin_max_period); - - close(fd); - return 1; -} - - -Uint -erts_instr_get_total(void) -{ - return erts_instr_stat ? stats->tot.size : 0; -} - -Uint -erts_instr_get_max_total(void) -{ - if (erts_instr_stat) { - update_max_ever_values(&stats->tot, 0, 0); - return stats->tot.max_size_ever; - } - return 0; -} - -Eterm -erts_instr_get_type_info(Process *proc) -{ - Eterm res, *tpls; - Uint hsz, *hszp, *hp, **hpp; - ErtsAlcType_t n; - - if (!am_n) - init_am_n(); - if (!am_a) - init_am_a(); - if (!am_c) - init_am_c(); - - tpls = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, - (ERTS_ALC_N_MAX-ERTS_ALC_N_MIN+1) - * sizeof(Eterm)); - hsz = 0; - hszp = &hsz; - hpp = NULL; - - restart_bld: - -#if ERTS_ALC_N_MIN != 1 -#error ERTS_ALC_N_MIN is not 1 -#endif - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - if (!erts_allctrs_info[a].enabled) - a = ERTS_ALC_A_SYSTEM; - - tpls[n - ERTS_ALC_N_MIN] - = bld_tuple(hpp, hszp, 3, am_n[n], am_a[a], am_c[c]); - } - - res = bld_tuplev(hpp, hszp, ERTS_ALC_N_MAX-ERTS_ALC_N_MIN+1, tpls); - - if (!hpp) { - hp = HAlloc(proc, hsz); - hszp = NULL; - hpp = &hp; - goto restart_bld; - } - - erts_free(ERTS_ALC_T_TMP, tpls); - - return res; -} - -Uint -erts_instr_init(int stat, int map_stat) -{ - Uint extra_sz; - int i; - - am_tot = NULL; - am_n = NULL; - am_c = NULL; - am_a = NULL; - - erts_instr_memory_map = 0; - erts_instr_stat = 0; - atoms_initialized = 0; - - if (!stat && !map_stat) - return 0; - - stats = erts_alloc(ERTS_ALC_T_INSTR_INFO, sizeof(struct stats_)); - - erts_mtx_init(&instr_mutex, "instr", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); - - mem_anchor = NULL; - - /* Install instrumentation functions */ - ERTS_CT_ASSERT(sizeof(erts_allctrs) == sizeof(real_allctrs)); - - sys_memcpy((void *)real_allctrs,(void *)erts_allctrs,sizeof(erts_allctrs)); - - sys_memzero((void *) &stats->tot, sizeof(Stat_t)); - sys_memzero((void *) stats->a, sizeof(Stat_t)*(ERTS_ALC_A_MAX+1)); - sys_memzero((void *) stats->c, sizeof(Stat_t)*(ERTS_ALC_C_MAX+1)); - sys_memzero((void *) stats->n, sizeof(Stat_t)*(ERTS_ALC_N_MAX+1)); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) - stats->ap[i] = &stats->a[i]; - else - stats->ap[i] = &stats->a[ERTS_ALC_A_SYSTEM]; - } - - if (map_stat) { - - erts_mtx_init(&instr_x_mutex, "instr_x", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); - - erts_instr_memory_map = 1; - erts_instr_stat = 1; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - erts_allctrs[i].alloc = map_stat_alloc; - erts_allctrs[i].realloc = map_stat_realloc; - erts_allctrs[i].free = map_stat_free; - erts_allctrs[i].extra = (void *) &real_allctrs[i]; - } - instr_wrapper.lock = map_stat_pre_lock; - instr_wrapper.unlock = map_stat_pre_unlock; - extra_sz = MAP_STAT_BLOCK_HEADER_SIZE; - } - else { - erts_instr_stat = 1; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - erts_allctrs[i].alloc = stat_alloc; - erts_allctrs[i].realloc = stat_realloc; - erts_allctrs[i].free = stat_free; - erts_allctrs[i].extra = (void *) &real_allctrs[i]; - } - instr_wrapper.lock = stat_pre_lock; - instr_wrapper.unlock = stat_pre_unlock; - extra_sz = STAT_BLOCK_HEADER_SIZE; - } - erts_allctr_wrapper_prelock_init(&instr_wrapper); - return extra_sz; -} - diff --git a/erts/emulator/beam/erl_instrument.h b/erts/emulator/beam/erl_instrument.h deleted file mode 100644 index 351172b2fa..0000000000 --- a/erts/emulator/beam/erl_instrument.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2003-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifndef ERL_INSTRUMENT_H__ -#define ERL_INSTRUMENT_H__ - -#include "erl_mtrace.h" - -#define ERTS_INSTR_VSN 2 - -extern int erts_instr_memory_map; -extern int erts_instr_stat; - -Uint erts_instr_init(int stat, int map_stat); -int erts_instr_dump_memory_map_to(fmtfn_t to, void* to_arg); -int erts_instr_dump_memory_map(const char *name); -Eterm erts_instr_get_memory_map(Process *process); -int erts_instr_dump_stat_to(fmtfn_t to, void* to_arg, int begin_max_period); -int erts_instr_dump_stat(const char *name, int begin_max_period); -Eterm erts_instr_get_stat(Process *proc, Eterm what, int begin_max_period); -Eterm erts_instr_get_type_info(Process *proc); -Uint erts_instr_get_total(void); -Uint erts_instr_get_max_total(void); - -#endif diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 47b46fdd75..88b30644b7 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -36,7 +36,6 @@ #include "erl_db.h" #include "dist.h" #include "beam_catches.h" -#include "erl_instrument.h" #include "erl_threads.h" #include "erl_binary.h" #include "beam_bp.h" @@ -2508,6 +2507,8 @@ handle_yield(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) yield |= erts_handle_yielded_ets_all_request(awdp->esdp, &awdp->yield.ets_all); + yield |= erts_handle_yielded_alcu_blockscan(awdp->esdp, + &awdp->yield.alcu_blockscan); /* * Other yielding operations... @@ -8334,6 +8335,7 @@ sched_thread_func(void *vesdp) ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL); #endif + erts_alcu_sched_spec_data_init(esdp); erts_ets_sched_spec_data_init(esdp); process_main(esdp->x_reg_array, esdp->f_reg_array); diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index bb97b6690c..e2aa1d9f84 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -593,6 +593,7 @@ typedef struct { ErtsDelayedAuxWorkWakeupJob *job; } delayed_wakeup; struct { + ErtsAlcuBlockscanYieldData alcu_blockscan; ErtsEtsAllYieldData ets_all; /* Other yielding operations... */ } yield; |