diff options
author | John Högberg <[email protected]> | 2018-03-27 13:14:40 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2018-04-23 13:13:53 +0200 |
commit | 573a5abd9d6b1668b49376b489b187780c7125c7 (patch) | |
tree | fe720d8a93b7a5199c2ef2ffe306396a6cb13f92 | |
parent | 26d72d02167aed57e43f1ad669039b96aa154fb8 (diff) | |
download | otp-573a5abd9d6b1668b49376b489b187780c7125c7.tar.gz otp-573a5abd9d6b1668b49376b489b187780c7125c7.tar.bz2 otp-573a5abd9d6b1668b49376b489b187780c7125c7.zip |
erts: Rewrite memory instrumentation
This commit replaces the old memory instrumentation with a new
implementation that scans carriers instead of wrapping
erts_alloc/erts_free. The old implementation could not extract
information without halting the emulator, had considerable runtime
overhead, and the memory maps it produced were noisy and lacked
critical information.
Since the new implementation walks through existing data structures
there's no longer a need to start the emulator with special flags to
get information about carrier utilization/fragmentation. Memory
fragmentation is also easier to diagnose as it's presented on a
per-carrier basis which eliminates the need to account for "holes"
between mmap segments.
To help track allocations, each allocation can now be tagged with
what it is and who allocated it at the cost of one extra word per
allocation. This is controlled on a per-allocator basis with the
+M<S>atags option, and is enabled by default for binary_alloc and
driver_alloc (which is also used by NIFs).
22 files changed, 2141 insertions, 2426 deletions
diff --git a/erts/doc/src/erts_alloc.xml b/erts/doc/src/erts_alloc.xml index 53e136d76c..0893eb291c 100644 --- a/erts/doc/src/erts_alloc.xml +++ b/erts/doc/src/erts_alloc.xml @@ -559,6 +559,20 @@ than this threshold, otherwise the carrier is shrunk. See also <seealso marker="#M_rsbcst"><c>rsbcst</c></seealso>.</p> </item> + <tag><marker id="M_atags"/><c><![CDATA[+M<S>atags true|false]]></c></tag> + <item> + <p>Adds a small tag to each allocated block that contains basic + information about what it is and who allocated it. Use the + <seealso marker="tools:instrument"><c>instrument</c></seealso> + module to inspect this information.</p> + + <p>The runtime overhead is one word per allocation when enabled. This + may change at any time in the future.</p> + + <p>The default is <c>true</c> for <c>binary_alloc</c> and + <c>driver_alloc</c>, and <c>false</c> for the other allocator + types.</p> + </item> <tag><marker id="M_e"/><c><![CDATA[+M<S>e true|false]]></c></tag> <item> <p>Enables allocator <c><![CDATA[<S>]]></c>.</p> @@ -724,22 +738,12 @@ <section> <title>Instrumentation Flags</title> <taglist> - <tag><marker id="Mim"/><c>+Mim true|false</c></tag> - <item> - <p>A map over current allocations is kept by the emulator. - The allocation map can be retrieved through module - <seealso marker="tools:instrument"> - <c>instrument(3)</c></seealso>. <c>+Mim true</c> - implies <c>+Mis true</c>. <c>+Mim true</c> is the same as flag - <seealso marker="erl#instr"><c>-instr</c></seealso> in - <c>erl(1)</c>.</p> - </item> - <tag><marker id="Mis"/><c>+Mis true|false</c></tag> - <item> - <p>Status over allocated memory is kept by the emulator. - The allocation status can be retrieved through module - <seealso marker="tools:instrument"> - <c>instrument(3)</c></seealso>.</p> + <tag><c>+M<S>atags</c></tag> + <item> + <p>Adds a small tag to each allocated block that contains basic + information about what it is and who allocated it. See + <seealso marker="#M_atags"><c>+M<S>atags</c></seealso> for a + more complete description.</p> </item> <tag><marker id="Mit"/><c>+Mit X</c></tag> <item> diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 92cf40c1ae..5dfa60ee74 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -832,7 +832,7 @@ RUN_OBJS += \ $(OBJDIR)/erl_alloc.o $(OBJDIR)/erl_mtrace.o \ $(OBJDIR)/erl_alloc_util.o $(OBJDIR)/erl_goodfit_alloc.o \ $(OBJDIR)/erl_bestfit_alloc.o $(OBJDIR)/erl_afit_alloc.o \ - $(OBJDIR)/erl_instrument.o $(OBJDIR)/erl_init.o \ + $(OBJDIR)/erl_init.o \ $(OBJDIR)/erl_atom_table.o $(OBJDIR)/erl_bif_table.o \ $(OBJDIR)/erl_bif_ddll.o $(OBJDIR)/erl_bif_guard.o \ $(OBJDIR)/erl_bif_info.o $(OBJDIR)/erl_bif_op.o \ diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 687fd39d58..bcaf9277bf 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -693,3 +693,5 @@ bif erts_internal:new_connection/1 bif erts_internal:abort_connection/2 bif erts_internal:map_next/3 bif ets:whereis/1 +bif erts_internal:gather_alloc_histograms/1 +bif erts_internal:gather_carrier_info/1 diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index ba8cc5e2ba..9ff52c92b8 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -36,7 +36,6 @@ #include "hash.h" #include "atom.h" #include "beam_load.h" -#include "erl_instrument.h" #include "erl_hl_timer.h" #include "erl_thr_progress.h" #include "erl_proc_sig_queue.h" @@ -955,20 +954,6 @@ erl_crash_dump_v(char *file, int line, char* fmt, va_list args) erts_cbprintf(to, to_arg, "=atoms\n"); dump_atoms(to, to_arg); - /* Keep the instrumentation data at the end of the dump */ - if (erts_instr_memory_map || erts_instr_stat) { - erts_cbprintf(to, to_arg, "=instr_data\n"); - - if (erts_instr_stat) { - erts_cbprintf(to, to_arg, "=memory_status\n"); - erts_instr_dump_stat_to(to, to_arg, 0); - } - if (erts_instr_memory_map) { - erts_cbprintf(to, to_arg, "=memory_map\n"); - erts_instr_dump_memory_map_to(to, to_arg); - } - } - erts_cbprintf(to, to_arg, "=end\n"); if (fp) { fclose(fp); diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 061b9df627..d99d2ea57b 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -38,7 +38,7 @@ #include "erl_db.h" #include "erl_binary.h" #include "erl_bits.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "erl_mseg.h" #include "erl_monitor_link.h" #include "erl_hl_timer.h" @@ -202,8 +202,6 @@ typedef struct { int top_pad; AlcUInit_t alloc_util; struct { - int stat; - int map; char *mtrace; char *nodename; } instr; @@ -428,6 +426,7 @@ set_default_binary_alloc_opts(struct au_init *ip) #endif ip->init.util.ts = ERTS_ALC_MTA_BINARY; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; } static void @@ -464,6 +463,7 @@ set_default_driver_alloc_opts(struct au_init *ip) #endif ip->init.util.ts = ERTS_ALC_MTA_DRIVER; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; } static void @@ -501,6 +501,7 @@ set_default_test_alloc_opts(struct au_init *ip) ip->init.util.mmbcs = 0; /* Main carrier size */ ip->init.util.ts = ERTS_ALC_MTA_TEST; ip->init.util.acul = ERTS_ALC_DEFAULT_ACUL; + ip->init.util.atags = 1; /* Use a constant minimal MBC size */ #if ERTS_SA_MB_CARRIERS @@ -906,7 +907,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) &test_alloc_state); erts_mtrace_install_wrapper_functions(); - extra_block_size += erts_instr_init(init.instr.stat, init.instr.map); init_aireq_alloc(); @@ -1411,7 +1411,9 @@ handle_au_arg(struct au_init *auip, } if (!strategy_support_carrier_migration(auip)) auip->init.util.acul = 0; - } + } else if (has_prefix("atags", sub_param)) { + auip->init.util.atags = get_bool_value(sub_param + 5, argv, ip); + } else goto bad_switch; break; @@ -1741,24 +1743,6 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) break; case 'i': switch (argv[i][3]) { - case 's': - arg = get_value(argv[i]+4, argv, &i); - if (sys_strcmp("true", arg) == 0) - init->instr.stat = 1; - else if (sys_strcmp("false", arg) == 0) - init->instr.stat = 0; - else - bad_value(param, param+3, arg); - break; - case 'm': - arg = get_value(argv[i]+4, argv, &i); - if (sys_strcmp("true", arg) == 0) - init->instr.map = 1; - else if (sys_strcmp("false", arg) == 0) - init->instr.map = 0; - else - bad_value(param, param+3, arg); - break; case 't': init->instr.mtrace = get_value(argv[i]+4, argv, &i); break; @@ -1817,9 +1801,7 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) case '-': if (argv[i][2] == '\0') { /* End of system flags reached */ - if (init->instr.mtrace - /* || init->instr.stat - || init->instr.map */) { + if (init->instr.mtrace) { while (i < *argc) { if(sys_strcmp(argv[i], "-sname") == 0 || sys_strcmp(argv[i], "-name") == 0) { @@ -2097,7 +2079,7 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) * NOTE! When updating this function, make sure to also update * erlang:memory/[0,1] in $ERL_TOP/erts/preloaded/src/erlang.erl */ -#define ERTS_MEM_NEED_ALL_ALCU (!erts_instr_stat && want_tot_or_sys) +#define ERTS_MEM_NEED_ALL_ALCU (want_tot_or_sys) struct { int total; int processes; @@ -2108,7 +2090,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) int binary; int code; int ets; - int maximum; } want = {0}; struct { UWord total; @@ -2120,7 +2101,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) UWord binary; UWord code; UWord ets; - UWord maximum; } size = {0}; Eterm atoms[sizeof(size)/sizeof(UWord)]; UWord *uintps[sizeof(size)/sizeof(UWord)]; @@ -2173,12 +2153,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) want.ets = 1; atoms[length] = am_ets; uintps[length++] = &size.ets; - - want.maximum = erts_instr_stat; - if (want.maximum) { - atoms[length] = am_maximum; - uintps[length++] = &size.maximum; - } } else { DeclareTmpHeapNoproc(tmp_heap,2); @@ -2260,18 +2234,6 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) uintps[length++] = &size.ets; } break; - case am_maximum: - if (erts_instr_stat) { - if (!want.maximum) { - want.maximum = 1; - atoms[length] = am_maximum; - uintps[length++] = &size.maximum; - } - } else { - UnUseTmpHeapNoproc(2); - return am_badarg; - } - break; default: UnUseTmpHeapNoproc(2); return am_badarg; @@ -2437,14 +2399,7 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg) size.ets += erts_get_ets_misc_mem_size(); } - if (erts_instr_stat && (want_tot_or_sys || want.maximum)) { - if (want_tot_or_sys) { - size.total = erts_instr_get_total(); - size.system = size.total - size.processes; - } - size.maximum = erts_instr_get_max_total(); - } - else if (want_tot_or_sys) { + if (want_tot_or_sys) { size.system = size.total - size.processes; } @@ -2522,18 +2477,6 @@ erts_allocated_areas(fmtfn_t *print_to_p, void *print_to_arg, void *proc) i = 0; - if (erts_instr_stat) { - values[i].arity = 2; - values[i].name = "total"; - values[i].ui[0] = erts_instr_get_total(); - i++; - - values[i].arity = 2; - values[i].name = "maximum"; - values[i].ui[0] = erts_instr_get_max_total(); - i++; - } - values[i].arity = 2; values[i].name = "sys_misc"; values[i].ui[0] = erts_sys_misc_mem_sz(); @@ -2824,10 +2767,7 @@ erts_allocator_info(fmtfn_t to, void *arg) erts_alcu_au_info_options(&to, arg, NULL, NULL); erts_print(to, arg, "=allocator:instr\n"); - erts_print(to, arg, "option m: %s\n", - erts_instr_memory_map ? "true" : "false"); - erts_print(to, arg, "option s: %s\n", - erts_instr_stat ? "true" : "false"); + erts_print(to, arg, "option t: %s\n", erts_mtrace_enabled ? "true" : "false"); @@ -2933,16 +2873,12 @@ erts_allocator_options(void *proc) NULL, hpp, szp); #endif { - Eterm o[3], v[3]; - o[0] = am_atom_put("m", 1); - v[0] = erts_instr_memory_map ? am_true : am_false; - o[1] = am_atom_put("s", 1); - v[1] = erts_instr_stat ? am_true : am_false; - o[2] = am_atom_put("t", 1); - v[2] = erts_mtrace_enabled ? am_true : am_false; + Eterm o[1], v[1]; + o[0] = am_atom_put("t", 1); + v[0] = erts_mtrace_enabled ? am_true : am_false; atoms[length] = am_atom_put("instr", 5); - terms[length++] = erts_bld_2tup_list(hpp, szp, 3, o, v); + terms[length++] = erts_bld_2tup_list(hpp, szp, 1, o, v); } atoms[length] = am_atom_put("lock_physical_memory", 20); @@ -3458,8 +3394,8 @@ badarg: /* * The allocator wrapper prelocking stuff below is about the locking order. - * It only affects wrappers (erl_mtrace.c and erl_instrument.c) that keep locks - * during alloc/realloc/free. + * It only affects wrappers (erl_mtrace.c) that keep locks during + * alloc/realloc/free. * * Some query functions in erl_alloc_util.c lock the allocator mutex and then * use erts_printf that in turn may call the sys allocator through the wrappers. diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index fed51eb200..fdf355d503 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -48,6 +48,8 @@ #include "erl_mseg.h" #include "erl_threads.h" #include "erl_thr_progress.h" +#include "erl_bif_unique.h" +#include "erl_nif.h" #ifdef ERTS_ENABLE_LOCK_COUNT #include "erl_lock_count.h" @@ -139,6 +141,33 @@ MBC after deallocating first block: [Carrier_t|pad|Block_t 111| udata... ] */ +/* Allocation tags ... + * + * These are added to the footer of every block when enabled. Currently they + * consist of the allocation type and an atom identifying the allocating + * driver/nif (or 'system' if that can't be determined), but the format is not + * supposed to be set in stone. + * + * The packing scheme requires that the atom values are small enough to fit + * into a word with ERTS_ALC_N_BITS to spare. Users must check for overflow + * before MAKE_ATAG(). */ + +typedef UWord alcu_atag_t; + +#define MAKE_ATAG(IdAtom, Type) \ + (ASSERT((Type) >= ERTS_ALC_N_MIN && (Type) <= ERTS_ALC_N_MAX), \ + ASSERT(atom_val(IdAtom) <= MAX_ATAG_ATOM_ID), \ + (atom_val(IdAtom) << ERTS_ALC_N_BITS) | (Type)) + +#define ATAG_ID(AT) (make_atom((AT) >> ERTS_ALC_N_BITS)) +#define ATAG_TYPE(AT) ((AT) & ERTS_ALC_N_MASK) + +#define MAX_ATAG_ATOM_ID (ERTS_UWORD_MAX >> ERTS_ALC_N_BITS) + +#define DBG_IS_VALID_ATAG(Allocator, AT) \ + (ATAG_TYPE(AT) >= ERTS_ALC_N_MIN && \ + ATAG_TYPE(AT) <= ERTS_ALC_N_MAX && \ + (Allocator)->alloc_no == ERTS_ALC_T2A(ERTS_ALC_N2T(ATAG_TYPE(AT)))) /* Blocks ... */ @@ -153,10 +182,17 @@ MBC after deallocating first block: #endif #define FBLK_FTR_SZ (sizeof(FreeBlkFtr_t)) +#define GET_BLK_ATAG(B) \ + (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1]) +#define SET_BLK_ATAG(B, T) \ + (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1] = (T)) + +#define BLK_ATAG_SZ(AP) ((AP)->atags ? sizeof(alcu_atag_t) : 0) + #define UMEMSZ2BLKSZ(AP, SZ) \ - (ABLK_HDR_SZ + (SZ) <= (AP)->min_block_size \ + (ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ) <= (AP)->min_block_size \ ? (AP)->min_block_size \ - : UNIT_CEILING(ABLK_HDR_SZ + (SZ))) + : UNIT_CEILING(ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ))) #define UMEM2BLK(P) ((Block_t *) (((char *) (P)) - ABLK_HDR_SZ)) #define BLK2UMEM(P) ((void *) (((char *) (P)) + ABLK_HDR_SZ)) @@ -688,6 +724,62 @@ static void destroy_carrier(Allctr_t *, Block_t *, Carrier_t **); static void mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp); static void dealloc_block(Allctr_t *, void *, ErtsAlcFixList_t *, int); +static alcu_atag_t determine_alloc_tag(Allctr_t *allocator, ErtsAlcType_t type) +{ + ErtsSchedulerData *esdp; + Eterm id; + + ERTS_CT_ASSERT(_unchecked_atom_val(am_system) <= MAX_ATAG_ATOM_ID); + ASSERT(allocator->atags); + + esdp = erts_get_scheduler_data(); + id = am_system; + + if (esdp) { + if (esdp->current_nif) { + Module *mod = erts_nif_get_module((esdp->current_nif)->mod_nif); + + /* Mod can be NULL if a resource destructor allocates memory after + * the module has been unloaded. */ + if (mod) { + id = make_atom(mod->module); + } + } else if (esdp->current_port) { + Port *p = esdp->current_port; + id = (p->drv_ptr)->name_atom; + } + + /* We fall back to 'system' if we can't pack the driver/NIF name into + * the tag. This may be a bit misleading but we've made no promises + * that the information is complete. + * + * This can only happen on 32-bit emulators when a new driver/NIF has + * been loaded *after* 16 million atoms have been used, and supporting + * that fringe case is not worth an extra word. 64-bit emulators are + * unaffected since the atom cache limits atom indexes to 32 bits. */ + if(MAX_ATOM_TABLE_SIZE > MAX_ATAG_ATOM_ID) { + if (atom_val(id) > MAX_ATAG_ATOM_ID) { + id = am_system; + } + } + } + + return MAKE_ATAG(id, type); +} + +static void set_alloc_tag(Allctr_t *allocator, void *p, alcu_atag_t tag) +{ + Block_t *block; + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + ASSERT(allocator->atags && p); + (void)allocator; + + block = UMEM2BLK(p); + + SET_BLK_ATAG(block, tag); +} + /* internal data... */ #if 0 @@ -4242,6 +4334,7 @@ static struct { Eterm e; Eterm t; Eterm ramv; + Eterm atags; #if HAVE_ERTS_MSEG Eterm asbcst; Eterm rsbcst; @@ -4311,7 +4404,7 @@ static struct { #endif } am; -static Eterm fix_type_atoms[ERTS_ALC_NO_FIXED_SIZES]; +static Eterm alloc_type_atoms[ERTS_ALC_N_MAX + 1]; static ERTS_INLINE void atom_init(Eterm *atom, char *name) { @@ -4342,6 +4435,7 @@ init_atoms(Allctr_t *allctr) AM_INIT(e); AM_INIT(t); AM_INIT(ramv); + AM_INIT(atags); #if HAVE_ERTS_MSEG AM_INIT(asbcst); AM_INIT(rsbcst); @@ -4413,12 +4507,12 @@ init_atoms(Allctr_t *allctr) } #endif - for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) { - ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; - char *name = (char *) ERTS_ALC_N2TD(n); - size_t len = sys_strlen(name); - fix_type_atoms[ix] = am_atom_put(name, len); - } + for (ix = ERTS_ALC_N_MIN; ix <= ERTS_ALC_N_MAX; ix++) { + const char *name = ERTS_ALC_N2TD(ix); + size_t len = sys_strlen(name); + + alloc_type_atoms[ix] = am_atom_put(name, len); + } } if (allctr && !allctr->atoms_initialized) { @@ -4531,6 +4625,7 @@ sz_info_fix(Allctr_t *allctr, ErtsAlcFixList_t *fix = &allctr->fix[ix]; UWord alloced = fix->type_size * fix->u.cpool.allocated; UWord used = fix->type_size * fix->u.cpool.used; + ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; if (print_to_p) { fmtfn_t to = *print_to_p; @@ -4538,15 +4633,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type internal: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_N2TD(n), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[n], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -4559,6 +4653,7 @@ sz_info_fix(Allctr_t *allctr, ErtsAlcFixList_t *fix = &allctr->fix[ix]; UWord alloced = fix->type_size * fix->u.nocpool.allocated; UWord used = fix->type_size*fix->u.nocpool.used; + ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; if (print_to_p) { fmtfn_t to = *print_to_p; @@ -4566,15 +4661,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_N2TD(n), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[n], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -5000,6 +5094,7 @@ info_options(Allctr_t *allctr, "option e: true\n" "option t: %s\n" "option ramv: %s\n" + "option atags: %s\n" "option sbct: %beu\n" #if HAVE_ERTS_MSEG "option asbcst: %bpu\n" @@ -5018,6 +5113,7 @@ info_options(Allctr_t *allctr, "option acul: %bpu\n", topt, allctr->ramv ? "true" : "false", + allctr->atags ? "true" : "false", allctr->sbc_threshold, #if HAVE_ERTS_MSEG allctr->mseg_opt.abs_shrink_th, @@ -5087,6 +5183,7 @@ info_options(Allctr_t *allctr, am_sbct, bld_uint(hpp, szp, allctr->sbc_threshold)); add_2tup(hpp, szp, &res, am.ramv, allctr->ramv ? am_true : am_false); + add_2tup(hpp, szp, &res, am.atags, allctr->atags ? am_true : am_false); add_2tup(hpp, szp, &res, am.t, (allctr->t ? am_true : am_false)); add_2tup(hpp, szp, &res, am.e, am_true); } @@ -5408,9 +5505,8 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t * /* ----------------------------------------------------------------------- */ static ERTS_INLINE void * -do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) +do_erts_alcu_alloc(ErtsAlcType_t type, Allctr_t *allctr, Uint size) { - Allctr_t *allctr = (Allctr_t *) extra; void *res; ASSERT(initialized); @@ -5449,10 +5545,19 @@ do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) void *erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) { + Allctr_t *allctr = (Allctr_t *) extra; void *res; + ASSERT(!"This is not thread safe"); - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5462,13 +5567,25 @@ void * erts_alcu_alloc_ts(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); - DEBUG_CHECK_ALIGNMENT(res); + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } erts_mtx_unlock(&allctr->mutex); + + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5478,6 +5595,7 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5487,11 +5605,19 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5504,10 +5630,15 @@ void * erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *pref_allctr; + alcu_atag_t tag = 0; void *res; pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } + if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); @@ -5523,12 +5654,15 @@ erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) res = do_erts_alcu_alloc(type, pref_allctr, size); } + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } + if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); DEBUG_CHECK_ALIGNMENT(res); - return res; } @@ -5537,10 +5671,9 @@ erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) /* ------------------------------------------------------------------------- */ static ERTS_INLINE void -do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, +do_erts_alcu_free(ErtsAlcType_t type, Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; ASSERT(initialized); ASSERT(allctr); @@ -5572,7 +5705,8 @@ do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, void erts_alcu_free(ErtsAlcType_t type, void *extra, void *p) { - do_erts_alcu_free(type, extra, p, NULL); + Allctr_t *allctr = (Allctr_t *) extra; + do_erts_alcu_free(type, allctr, p, NULL); } @@ -5581,7 +5715,7 @@ erts_alcu_free_ts(ErtsAlcType_t type, void *extra, void *p) { Allctr_t *allctr = (Allctr_t *) extra; erts_mtx_lock(&allctr->mutex); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); erts_mtx_unlock(&allctr->mutex); } @@ -5641,13 +5775,12 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) static ERTS_INLINE void * do_erts_alcu_realloc(ErtsAlcType_t type, - void *extra, + Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; Block_t *blk; void *res; @@ -5661,7 +5794,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); if (!p) { - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_alloc); return res; @@ -5670,7 +5803,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, #if ALLOC_ZERO_EQ_NULL if (!size) { ASSERT(p); - do_erts_alcu_free(type, extra, p, busy_pcrr_pp); + do_erts_alcu_free(type, allctr, p, busy_pcrr_pp); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_free); return NULL; @@ -5755,19 +5888,29 @@ do_erts_alcu_realloc(ErtsAlcType_t type, void * erts_alcu_realloc(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_realloc(type, extra, p, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } void * erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = erts_alcu_realloc(type, extra, p, size); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5777,23 +5920,42 @@ erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } - void * erts_alcu_realloc_ts(ErtsAlcType_t type, void *extra, void *ptr, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_realloc(type, extra, ptr, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5801,11 +5963,17 @@ void * erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = do_erts_alcu_realloc(type, extra, p, size, 0, NULL); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5815,10 +5983,17 @@ erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5829,6 +6004,7 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5838,11 +6014,19 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5857,6 +6041,7 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5866,14 +6051,16 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); if (!res) { - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); - res = erts_alcu_realloc_thr_spec(type, allctr, ptr, size); + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); } else { Block_t *blk; @@ -5885,29 +6072,34 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, cpy_size = size; sys_memcpy(res, ptr, cpy_size); do_erts_alcu_free(type, allctr, ptr, NULL); - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); } + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + + if (allctr->thread_safe) + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); return res; } static ERTS_INLINE void * -realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, +realloc_thr_pref(ErtsAlcType_t type, Allctr_t *pref_allctr, void *p, Uint size, int force_move) { void *res; - Allctr_t *pref_allctr, *used_allctr; + Allctr_t *used_allctr; UWord old_user_size; Carrier_t *busy_pcrr_p; + alcu_atag_t tag = 0; int retried; - if (!p) - return erts_alcu_alloc_thr_pref(type, extra, size); - - pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); @@ -5936,6 +6128,11 @@ restart: retried = 1; goto restart; } + + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } + if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); } @@ -5944,6 +6141,9 @@ restart: if (!res) goto unlock_ts_return; else { + if (pref_allctr->atags) { + set_alloc_tag(pref_allctr, res, tag); + } DEBUG_CHECK_ALIGNMENT(res); @@ -5974,20 +6174,34 @@ restart: } } + DEBUG_CHECK_ALIGNMENT(res); + return res; } void * erts_alcu_realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 0); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 0); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } void * erts_alcu_realloc_mv_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 1); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 1); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } @@ -6071,6 +6285,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->t = 0; allctr->ramv = init->ramv; + allctr->atags = init->atags; allctr->main_carrier_size = init->mmbcs; #if HAVE_ERTS_MSEG @@ -6320,6 +6535,1072 @@ erts_alcu_init(AlcUInit_t *init) initialized = 1; } +/* ------------------------------------------------------------------------- */ + +/* Allocation histograms and carrier information is gathered by walking through + * all carriers associated with each allocator instance. This is done as + * aux_yield_work on the scheduler that owns each instance. + * + * Yielding is implemented by temporarily inserting a "dummy carrier" at the + * last position. It's permanently "busy" so it won't get picked up by someone + * else when in the carrier pool, and we never make the employer aware of it + * through callbacks so we can't accidentally allocate on it. + * + * Plain malloc/free is used to guarantee we won't allocate with the allocator + * we're scanning. */ + +/* Yield between carriers once this many blocks have been processed. Note that + * a single carrier scan may exceed this figure. */ +#ifndef DEBUG + #define BLOCKSCAN_REDUCTIONS (8000) +#else + #define BLOCKSCAN_REDUCTIONS (400) +#endif + +/* Abort a single carrier scan after this many blocks to prevent really large + * MBCs from blocking forever. */ +#define BLOCKSCAN_BAILOUT_THRESHOLD (16000) + +typedef struct alcu_blockscan { + /* A per-scheduler list used when multiple scans have been queued. The + * current scanner will always run until completion/abort before moving on + * to the next. */ + struct alcu_blockscan *scanner_queue; + + Allctr_t *allocator; + Process *process; + + int (*current_op)(struct alcu_blockscan *scanner); + int (*next_op)(struct alcu_blockscan *scanner); + int reductions; + + ErtsAlcCPoolData_t *cpool_cursor; + CarrierList_t *current_clist; + Carrier_t *clist_cursor; + Carrier_t dummy_carrier; + + /* Called if the process that started this job dies before we're done. */ + void (*abort)(void *user_data); + + /* Called on each carrier. The callback must return the number of blocks + * scanned to yield properly between carriers. + * + * Note that it's not possible to "yield back" into a carrier. */ + int (*scan)(Allctr_t *, void *user_data, Carrier_t *); + + /* Called when all carriers have been scanned. The callback may return + * non-zero to yield. */ + int (*finish)(void *user_data); + + void *user_data; +} blockscan_t; + +static Carrier_t *blockscan_restore_clist_cursor(blockscan_t *state) +{ + Carrier_t *cursor = state->clist_cursor; + + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + if (cursor == &state->dummy_carrier) { + cursor = cursor->next; + + unlink_carrier(state->current_clist, state->clist_cursor); + } + + return cursor; +} + +static void blockscan_save_clist_cursor(blockscan_t *state, Carrier_t *after) +{ + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + state->clist_cursor = &state->dummy_carrier; + + (state->clist_cursor)->next = after->next; + (state->clist_cursor)->prev = after; + + relink_carrier(state->current_clist, state->clist_cursor); +} + +static int blockscan_clist_yielding(blockscan_t *state) +{ + Carrier_t *cursor = blockscan_restore_clist_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor) { + /* Skip dummy carriers inserted by another (concurrent) block scan. + * This can happen when scanning thread-safe allocators from multiple + * schedulers. */ + if (CARRIER_SZ(cursor) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + cursor); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_clist_cursor(state, cursor); + return 1; + } + } + + cursor = cursor->next; + } + + return 0; +} + +static ErtsAlcCPoolData_t *blockscan_restore_cpool_cursor(blockscan_t *state) +{ + ErtsAlcCPoolData_t *cursor; + + cursor = cpool_aint2cpd(cpool_read(&(state->cpool_cursor)->next)); + + if (state->cpool_cursor == &state->dummy_carrier.cpool) { + cpool_delete(state->allocator, state->allocator, &state->dummy_carrier); + } + + return cursor; +} + +static void blockscan_save_cpool_cursor(blockscan_t *state, + ErtsAlcCPoolData_t *after) +{ + ErtsAlcCPoolData_t *dummy_carrier, *prev_carrier, *next_carrier; + + dummy_carrier = &state->dummy_carrier.cpool; + + next_carrier = cpool_aint2cpd(cpool_mod_mark(&after->next)); + prev_carrier = cpool_aint2cpd(cpool_mod_mark(&next_carrier->prev)); + + cpool_init(&dummy_carrier->next, (erts_aint_t)next_carrier); + cpool_init(&dummy_carrier->prev, (erts_aint_t)prev_carrier); + + cpool_set_mod_marked(&prev_carrier->next, + (erts_aint_t)dummy_carrier, + (erts_aint_t)next_carrier); + cpool_set_mod_marked(&next_carrier->prev, + (erts_aint_t)dummy_carrier, + (erts_aint_t)prev_carrier); + + state->cpool_cursor = dummy_carrier; +} + +static int blockscan_cpool_yielding(blockscan_t *state) +{ + ErtsAlcCPoolData_t *sentinel, *cursor; + + sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel; + cursor = blockscan_restore_cpool_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor != sentinel) { + Carrier_t *carrier; + erts_aint_t exp; + + /* When a deallocation happens on a pooled carrier it will be routed to + * its owner, so the only way to be sure that it isn't modified while + * scanning is to skip all carriers that aren't ours. The deallocations + * deferred to us will get handled when we're done. */ + while (cursor->orig_allctr != state->allocator) { + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + + if (cursor == sentinel) { + return 0; + } + } + + carrier = ErtsContainerStruct(cursor, Carrier_t, cpool); + exp = erts_atomic_read_rb(&carrier->allctr); + + if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { + ASSERT(state->allocator == (Allctr_t*)(exp & ~ERTS_CRR_ALCTR_FLG_MASK)); + ASSERT(!(exp & ERTS_CRR_ALCTR_FLG_BUSY)); + + if (erts_atomic_cmpxchg_acqb(&carrier->allctr, + exp | ERTS_CRR_ALCTR_FLG_BUSY, + exp) == exp) { + /* Skip dummy carriers inserted by another (concurrent) block + * scan. This can happen when scanning thread-safe allocators + * from multiple schedulers. */ + if (CARRIER_SZ(carrier) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + carrier); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_cpool_cursor(state, cursor); + erts_atomic_set_relb(&carrier->allctr, exp); + + return 1; + } + } + + erts_atomic_set_relb(&carrier->allctr, exp); + } + } + + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + } + + return 0; +} + +static int blockscan_yield_helper(blockscan_t *state, + int (*yielding_op)(blockscan_t*)) +{ + /* Note that we don't check whether to abort here; only yielding_op knows + * whether the carrier is still in the list/pool. */ + + if ((state->allocator)->thread_safe) { + /* Locked scans have to be as short as possible. */ + state->reductions = 1; + + erts_mtx_lock(&(state->allocator)->mutex); + } else { + state->reductions = BLOCKSCAN_REDUCTIONS; + } + + if (yielding_op(state)) { + state->next_op = state->current_op; + } + + if ((state->allocator)->thread_safe) { + erts_mtx_unlock(&(state->allocator)->mutex); + } + + return 1; +} + +/* */ + +static int blockscan_finish(blockscan_t *state) +{ + if (ERTS_PROC_IS_EXITING(state->process)) { + state->abort(state->user_data); + return 0; + } + + state->current_op = blockscan_finish; + + return state->finish(state->user_data); +} + +static int blockscan_sweep_sbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_sbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_SBC, state->allocator); + state->current_clist = &(state->allocator)->sbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_sbcs; + state->next_op = blockscan_finish; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_mbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_mbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + state->current_clist = &(state->allocator)->mbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_mbcs; + state->next_op = blockscan_sweep_sbcs; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_cpool(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_cpool) { + ErtsAlcCPoolData_t *sentinel; + + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel; + state->cpool_cursor = sentinel; + } + + state->current_op = blockscan_sweep_cpool; + state->next_op = blockscan_sweep_mbcs; + + return blockscan_yield_helper(state, blockscan_cpool_yielding); +} + +static int blockscan_get_specific_allocator(int allocator_num, + int sched_id, + Allctr_t **out) +{ + ErtsAllocatorInfo_t *ai; + Allctr_t *allocator; + + ASSERT(allocator_num >= ERTS_ALC_A_MIN && + allocator_num <= ERTS_ALC_A_MAX); + ASSERT(sched_id >= 0 && sched_id <= erts_no_schedulers); + + ai = &erts_allctrs_info[allocator_num]; + + if (!ai->enabled || !ai->alloc_util) { + return 0; + } + + if (!ai->thr_spec) { + if (sched_id != 0) { + /* Only thread-specific allocators can be scanned on a specific + * scheduler. */ + return 0; + } + + allocator = (Allctr_t*)ai->extra; + ASSERT(allocator->thread_safe); + } else { + ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t*)ai->extra; + + ASSERT(sched_id < tspec->size); + + allocator = tspec->allctr[sched_id]; + } + + *out = allocator; + + return 1; +} + +static void blockscan_sched_trampoline(void *arg) +{ + ErtsAlcuBlockscanYieldData *yield; + ErtsSchedulerData *esdp; + blockscan_t *scanner; + + esdp = erts_get_scheduler_data(); + scanner = (blockscan_t*)arg; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (yield->last != NULL) { + blockscan_t *prev_scanner = yield->last; + + ASSERT(prev_scanner->scanner_queue == NULL); + + prev_scanner->scanner_queue = scanner; + } else { + yield->current = scanner; + } + + scanner->scanner_queue = NULL; + yield->last = scanner; + + erts_notify_new_aux_yield_work(esdp); +} + +static void blockscan_dispatch(blockscan_t *scanner, Process *owner, + Allctr_t *allocator, int sched_id) +{ + ASSERT(erts_get_scheduler_id() != 0); + + if (sched_id == 0) { + /* Global instances are always handled on the current scheduler. */ + sched_id = ERTS_ALC_GET_THR_IX(); + ASSERT(allocator->thread_safe); + } + + scanner->allocator = allocator; + scanner->process = owner; + + erts_proc_inc_refc(scanner->process); + + cpool_init_carrier_data(scanner->allocator, &scanner->dummy_carrier); + erts_atomic_init_nob(&(scanner->dummy_carrier).allctr, + (erts_aint_t)allocator | ERTS_CRR_ALCTR_FLG_BUSY); + + if (ERTS_ALC_IS_CPOOL_ENABLED(scanner->allocator)) { + scanner->next_op = blockscan_sweep_cpool; + } else { + scanner->next_op = blockscan_sweep_mbcs; + } + + /* Aux yield jobs can only be set up while running on the scheduler that + * services them, so we move there before continuing. + * + * We can't drive the scan itself through this since the scheduler will + * always finish *all* misc aux work in one go which makes it impossible to + * yield. */ + erts_schedule_misc_aux_work(sched_id, blockscan_sched_trampoline, scanner); +} + +int erts_handle_yielded_alcu_blockscan(ErtsSchedulerData *esdp, + ErtsAlcuBlockscanYieldData *yield) +{ + blockscan_t *scanner = yield->current; + + (void)esdp; + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (scanner) { + if (scanner->next_op(scanner)) { + return 1; + } + + ASSERT(ERTS_PROC_IS_EXITING(scanner->process) || + scanner->current_op == blockscan_finish); + + yield->current = scanner->scanner_queue; + + if (yield->current == NULL) { + ASSERT(scanner == yield->last); + yield->last = NULL; + } + + erts_proc_dec_refc(scanner->process); + + /* Plain free is intentional. */ + free(scanner); + + return yield->current != NULL; + } + + return 0; +} + +void erts_alcu_sched_spec_data_init(ErtsSchedulerData *esdp) +{ + ErtsAlcuBlockscanYieldData *yield; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + yield->current = NULL; + yield->last = NULL; +} + +/* ------------------------------------------------------------------------- */ + +static ERTS_INLINE int u64_log2(Uint64 v) +{ + static const int log2_tab64[64] = { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5}; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return log2_tab64[((Uint64)((v - (v >> 1))*0x07EDD5E59A4E28C2)) >> 58]; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct hist_tree__ { + struct hist_tree__ *parent; + struct hist_tree__ *left; + struct hist_tree__ *right; + + int is_red; + + alcu_atag_t tag; + UWord histogram[1]; +} hist_tree_t; + +#define ERTS_RBT_PREFIX hist_tree +#define ERTS_RBT_T hist_tree_t +#define ERTS_RBT_KEY_T UWord +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) ((void)0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->tag) +#define ERTS_RBT_IS_LT(KX, KY) (KX < KY) +#define ERTS_RBT_IS_EQ(KX, KY) (KX == KY) +#define ERTS_RBT_WANT_FOREACH_DESTROY_YIELDING +#define ERTS_RBT_WANT_FOREACH_DESTROY +#define ERTS_RBT_WANT_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + hist_tree_rbt_yield_state_t hist_tree_yield; + hist_tree_t *hist_tree; + UWord hist_count; + + UWord hist_slot_start; + int hist_slot_count; + + UWord unscanned_size; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_ahist_t; + +static void gather_ahist_update(gather_ahist_t *state, UWord tag, UWord size) +{ + hist_tree_t *hist_node; + UWord size_interval; + int hist_slot; + + hist_node = hist_tree_rbt_lookup(state->hist_tree, tag); + + if (hist_node == NULL) { + /* Plain calloc is intentional. */ + hist_node = (hist_tree_t*)calloc(1, sizeof(hist_tree_t) + + (state->hist_slot_count - 1) * + sizeof(hist_node->histogram[0])); + hist_node->tag = tag; + + hist_tree_rbt_insert(&state->hist_tree, hist_node); + state->hist_count++; + } + + size_interval = (size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + hist_node->histogram[hist_slot]++; +} + +static int gather_ahist_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_ahist_t *state; + int blocks_scanned; + Block_t *block; + + state = (gather_ahist_t*)user_data; + blocks_scanned = 1; + + if (IS_SB_CARRIER(carrier)) { + alcu_atag_t tag; + + block = SBC2BLK(allocator, carrier); + tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + + gather_ahist_update(state, tag, SBC_BLK_SZ(block)); + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + ASSERT(IS_MB_CARRIER(carrier)); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + if (IS_ALLOCED_BLK(block)) { + alcu_atag_t tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(allocator, tag)); + + gather_ahist_update(state, tag, block_size); + } + + scanned_bytes += block_size; + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + state->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + return blocks_scanned; +} + +static void gather_ahist_append_result(hist_tree_t *node, void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + Eterm histogram_tuple, tag_tuple; + + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + hp = erts_produce_heap(&state->msg_factory, 7 + state->hist_slot_count, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(node->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + hp[0] = make_arityval(3); + hp[1] = ATAG_ID(node->tag); + hp[2] = alloc_type_atoms[ATAG_TYPE(node->tag)]; + hp[3] = histogram_tuple; + + tag_tuple = make_tuple(hp); + hp += 4; + + state->result_list = CONS(hp, tag_tuple, state->result_list); + + /* Plain free is intentional. */ + free(node); +} + +static void gather_ahist_send(gather_ahist_t *state) +{ + Eterm result_tuple, unscanned_size, task_ref; + + Uint term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->hist_count > 0)); + ASSERT(state->building_result); + + term_size = 4 + erts_iref_storage_heap_size(&state->iref); + term_size += IS_USMALL(0, state->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + unscanned_size = bld_unstable_uint(&hp, NULL, state->unscanned_size); + + hp[0] = make_arityval(3); + hp[1] = task_ref; + hp[2] = unscanned_size; + hp[3] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_ahist_finish(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, unscanned size, [{Tag, {Histogram}} | Rest]} */ + minimum_size = 4 + erts_iref_storage_heap_size(&state->iref) + + state->hist_count * (7 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + ERTS_RBT_YIELD_STAT_INIT(&state->hist_tree_yield); + + state->result_list = NIL; + state->building_result = 1; + } + + if (hist_tree_rbt_foreach_destroy_yielding(&state->hist_tree, + &gather_ahist_append_result, + state, + &state->hist_tree_yield, + BLOCKSCAN_REDUCTIONS)) { + return 1; + } + + gather_ahist_send(state); + + return 0; +} + +static void gather_ahist_destroy_result(hist_tree_t *node, void *arg) +{ + (void)arg; + free(node); +} + +static void gather_ahist_abort(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + hist_tree_rbt_foreach_destroy(&state->hist_tree, + &gather_ahist_destroy_result, + NULL); +} + +int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_ahist_t *gather_state; + blockscan_t *scanner; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } else if (!allocator->atags) { + return 0; + } + + ensure_atoms_initialized(allocator); + + /* Plain calloc is intentional. */ + gather_state = (gather_ahist_t*)calloc(1, sizeof(gather_ahist_t)); + scanner = &gather_state->common; + + scanner->abort = gather_ahist_abort; + scanner->scan = gather_ahist_scan; + scanner->finish = gather_ahist_finish; + scanner->user_data = gather_state; + + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct chist_node__ { + struct chist_node__ *next; + + UWord carrier_size; + UWord unscanned_size; + UWord allocated_size; + + /* BLOCKSCAN_BAILOUT_THRESHOLD guarantees we won't overflow this or the + * counters in the free block histogram. */ + int allocated_count; + int flags; + + int histogram[1]; +} chist_node_t; + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + Eterm allocator_desc; + + chist_node_t *info_list; + UWord info_count; + + UWord hist_slot_start; + int hist_slot_count; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_cinfo_t; + +static int gather_cinfo_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_cinfo_t *state; + chist_node_t *node; + int blocks_scanned; + Block_t *block; + + state = (gather_cinfo_t*)user_data; + node = calloc(1, sizeof(chist_node_t) + + (state->hist_slot_count - 1) * + sizeof(node->histogram[0])); + blocks_scanned = 1; + + /* ERTS_CRR_ALCTR_FLG_BUSY is ignored since we've set it ourselves and it + * would be misleading to include it. */ + node->flags = erts_atomic_read_rb(&carrier->allctr) & + (ERTS_CRR_ALCTR_FLG_MASK & ~ERTS_CRR_ALCTR_FLG_BUSY); + node->carrier_size = CARRIER_SZ(carrier); + + if (IS_SB_CARRIER(carrier)) { + UWord block_size; + + block = SBC2BLK(allocator, carrier); + block_size = SBC_BLK_SZ(block); + + node->allocated_size = block_size; + node->allocated_count = 1; + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + scanned_bytes += block_size; + + if (IS_ALLOCED_BLK(block)) { + node->allocated_size += block_size; + node->allocated_count++; + } else { + UWord size_interval; + int hist_slot; + + size_interval = (block_size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + node->histogram[hist_slot]++; + } + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + node->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + node->next = state->info_list; + state->info_list = node; + state->info_count++; + + return blocks_scanned; +} + +static void gather_cinfo_append_result(gather_cinfo_t *state, + chist_node_t *info) +{ + Eterm carrier_size, unscanned_size, allocated_size; + Eterm histogram_tuple, carrier_tuple; + + Uint term_size; + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + term_size = 11 + state->hist_slot_count; + term_size += IS_USMALL(0, info->carrier_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->allocated_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(info->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + carrier_size = bld_unstable_uint(&hp, NULL, info->carrier_size); + unscanned_size = bld_unstable_uint(&hp, NULL, info->unscanned_size); + allocated_size = bld_unstable_uint(&hp, NULL, info->allocated_size); + + hp[0] = make_arityval(7); + hp[1] = state->allocator_desc; + hp[2] = carrier_size; + hp[3] = unscanned_size; + hp[4] = allocated_size; + hp[5] = make_small(info->allocated_count); + hp[6] = (info->flags & ERTS_CRR_ALCTR_FLG_IN_POOL) ? am_true : am_false; + hp[7] = histogram_tuple; + + carrier_tuple = make_tuple(hp); + hp += 8; + + state->result_list = CONS(hp, carrier_tuple, state->result_list); + + free(info); +} + +static void gather_cinfo_send(gather_cinfo_t *state) +{ + Eterm result_tuple, task_ref; + + int term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->info_count > 0)); + ASSERT(state->building_result); + + term_size = 3 + erts_iref_storage_heap_size(&state->iref); + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + hp[0] = make_arityval(2); + hp[1] = task_ref; + hp[2] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_cinfo_finish(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + int reductions = BLOCKSCAN_REDUCTIONS; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, [{Carrier size, unscanned size, allocated size, + * allocated block count, {Free block histogram}} | Rest]} */ + minimum_size = 3 + erts_iref_storage_heap_size(&state->iref) + + state->info_count * (11 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + state->result_list = NIL; + state->building_result = 1; + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + gather_cinfo_append_result(state, current); + + if (reductions-- <= 0) { + return 1; + } + } + + gather_cinfo_send(state); + + return 0; +} + +static void gather_cinfo_abort(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + free(current); + } +} + +int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_cinfo_t *gather_state; + blockscan_t *scanner; + + const char *allocator_desc; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } + + allocator_desc = ERTS_ALC_A2AD(allocator_num); + + /* Plain calloc is intentional. */ + gather_state = (gather_cinfo_t*)calloc(1, sizeof(gather_cinfo_t)); + scanner = &gather_state->common; + + scanner->abort = gather_cinfo_abort; + scanner->scan = gather_cinfo_scan; + scanner->finish = gather_cinfo_finish; + scanner->user_data = gather_state; + + gather_state->allocator_desc = erts_atom_put((byte *)allocator_desc, + sys_strlen(allocator_desc), + ERTS_ATOM_ENC_LATIN1, 1); + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start * 2; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ * NOTE: erts_alcu_test() is only supposed to be used for testing. * @@ -6441,6 +7722,7 @@ erts_alcu_verify_unused_ts(Allctr_t *allctr) erts_mtx_unlock(&allctr->mutex); } + #ifdef DEBUG int is_sbc_blk(Block_t* blk) { diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index 05c8a0db3b..ff4d10b206 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -50,6 +50,7 @@ typedef struct { int tspec; int tpref; int ramv; + int atags; UWord sbct; UWord asbcst; UWord rsbcst; @@ -106,6 +107,7 @@ typedef struct { 0, /* (bool) tspec: thread specific */\ 0, /* (bool) tpref: thread preferred */\ 0, /* (bool) ramv: realloc always moves */\ + 0, /* (bool) atags: tagged allocations */\ 512*1024, /* (bytes) sbct: sbc threshold */\ 2*1024*2024, /* (amount) asbcst: abs sbc shrink threshold */\ 20, /* (%) rsbcst: rel sbc shrink threshold */\ @@ -142,6 +144,7 @@ typedef struct { 0, /* (bool) tspec: thread specific */\ 0, /* (bool) tpref: thread preferred */\ 0, /* (bool) ramv: realloc always moves */\ + 0, /* (bool) atags: tagged allocations */\ 64*1024, /* (bytes) sbct: sbc threshold */\ 2*1024*2024, /* (amount) asbcst: abs sbc shrink threshold */\ 20, /* (%) rsbcst: rel sbc shrink threshold */\ @@ -224,6 +227,36 @@ void erts_lcnt_update_allocator_locks(int enable); int erts_alcu_try_set_dyn_param(Allctr_t*, Eterm param, Uint value); +/* Gathers per-tag allocation histograms from the given allocator number + * (ERTS_ALC_A_*) and scheduler id. An id of 0 means the global instance will + * be used. + * + * The results are sent to `p`, and it returns the number of messages to wait + * for. */ +int erts_alcu_gather_alloc_histograms(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref); + +/* Gathers per-carrier info from the given allocator number (ERTS_ALC_A_*) and + * scheduler id. An id of 0 means the global instance will be used. + * + * The results are sent to `p`, and it returns the number of messages to wait + * for. */ +int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref); + +struct alcu_blockscan; + +typedef struct { + struct alcu_blockscan *current; + struct alcu_blockscan *last; +} ErtsAlcuBlockscanYieldData; + +int erts_handle_yielded_alcu_blockscan(struct ErtsSchedulerData_ *esdp, + ErtsAlcuBlockscanYieldData *yield); +void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp); + #endif /* !ERL_ALLOC_UTIL__ */ #if defined(GET_ERL_ALLOC_UTIL_IMPL) && !defined(ERL_ALLOC_UTIL_IMPL__) @@ -548,6 +581,7 @@ struct Allctr_t_ { /* Options */ int t; int ramv; + int atags; Uint sbc_threshold; Uint sbc_move_threshold; Uint mbc_move_threshold; @@ -684,6 +718,7 @@ struct Allctr_t_ { #endif }; + int erts_alcu_start(Allctr_t *, AllctrInit_t *); void erts_alcu_stop(Allctr_t *); diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index d443e12409..0b186ab071 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -38,7 +38,7 @@ #include "erl_message.h" #include "erl_binary.h" #include "erl_db.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "dist.h" #include "erl_gc.h" #include "erl_cpu_topology.h" @@ -51,6 +51,7 @@ #include "erl_ptab.h" #include "erl_time.h" #include "erl_proc_sig_queue.h" +#include "erl_alloc_util.h" #ifdef HIPE #include "hipe_arch.h" #endif @@ -2151,45 +2152,6 @@ info_1_tuple(Process* BIF_P, /* Pointer to current process. */ return make_small(sizeof(UWord)); } goto badarg; - } else if (sel == am_allocated) { - if (arity == 2) { - Eterm res = THE_NON_VALUE; - char *buf; - Sint len = is_string(*tp); - if (len <= 0) - return res; - buf = (char *) erts_alloc(ERTS_ALC_T_TMP, len+1); - if (intlist_to_buf(*tp, buf, len) != len) - erts_exit(ERTS_ERROR_EXIT, "%s:%d: Internal error\n", __FILE__, __LINE__); - buf[len] = '\0'; - res = erts_instr_dump_memory_map(buf) ? am_true : am_false; - erts_free(ERTS_ALC_T_TMP, (void *) buf); - if (is_non_value(res)) - goto badarg; - return res; - } - else if (arity == 3 && tp[0] == am_status) { - if (is_atom(tp[1])) - return erts_instr_get_stat(BIF_P, tp[1], 1); - else { - Eterm res = THE_NON_VALUE; - char *buf; - Sint len = is_string(tp[1]); - if (len <= 0) - return res; - buf = (char *) erts_alloc(ERTS_ALC_T_TMP, len+1); - if (intlist_to_buf(tp[1], buf, len) != len) - erts_exit(ERTS_ERROR_EXIT, "%s:%d: Internal error\n", __FILE__, __LINE__); - buf[len] = '\0'; - res = erts_instr_dump_stat(buf, 1) ? am_true : am_false; - erts_free(ERTS_ALC_T_TMP, (void *) buf); - if (is_non_value(res)) - goto badarg; - return res; - } - } - else - goto badarg; } else if (sel == am_allocator) { switch (arity) { case 2: @@ -2557,8 +2519,6 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (BIF_ARG_1 == am_allocated_areas) { res = erts_allocated_areas(NULL, NULL, BIF_P); BIF_RET(res); - } else if (BIF_ARG_1 == am_allocated) { - BIF_RET(erts_instr_get_memory_map(BIF_P)); } else if (BIF_ARG_1 == am_hipe_architecture) { #if defined(HIPE) BIF_RET(hipe_arch_name); @@ -2699,9 +2659,6 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) sizeof(ERLANG_ARCHITECTURE)-1, NIL)); } - else if (BIF_ARG_1 == am_memory_types) { - return erts_instr_get_type_info(BIF_P); - } else if (BIF_ARG_1 == am_os_type) { BIF_RET(os_type_tuple); } @@ -4728,6 +4685,55 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2) BIF_ERROR(BIF_P, BADARG); } +static BIF_RETTYPE +gather_histograms_helper(Process * c_p, Eterm arg_tuple, + int gather(Process *, int, int, int, UWord, Eterm)) +{ + SWord hist_start, hist_width, sched_id; + int msg_count, alloc_num; + Eterm *args; + + /* This is an internal BIF, so the error checking is mostly left to erlang + * code. */ + + ASSERT(is_tuple_arity(arg_tuple, 5)); + args = tuple_val(arg_tuple); + + for (alloc_num = ERTS_ALC_A_MIN; alloc_num <= ERTS_ALC_A_MAX; alloc_num++) { + if(erts_is_atom_str(ERTS_ALC_A2AD(alloc_num), args[1], 0)) { + break; + } + } + + if (alloc_num > ERTS_ALC_A_MAX) { + BIF_ERROR(c_p, BADARG); + } + + sched_id = signed_val(args[2]); + hist_width = signed_val(args[3]); + hist_start = signed_val(args[4]); + + if (sched_id < 0 || sched_id > erts_no_schedulers) { + BIF_ERROR(c_p, BADARG); + } + + msg_count = gather(c_p, alloc_num, sched_id, hist_width, hist_start, args[5]); + + BIF_RET(make_small(msg_count)); +} + +BIF_RETTYPE erts_internal_gather_alloc_histograms_1(BIF_ALIST_1) +{ + return gather_histograms_helper(BIF_P, BIF_ARG_1, + erts_alcu_gather_alloc_histograms); +} + +BIF_RETTYPE erts_internal_gather_carrier_info_1(BIF_ALIST_1) +{ + return gather_histograms_helper(BIF_P, BIF_ARG_1, + erts_alcu_gather_carrier_info); +} + #ifdef ERTS_ENABLE_LOCK_COUNT typedef struct { diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 179822cc0b..57c6c10c7f 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -37,7 +37,7 @@ #include "erl_mseg.h" #include "erl_threads.h" #include "erl_hl_timer.h" -#include "erl_instrument.h" +#include "erl_mtrace.h" #include "erl_printf_term.h" #include "erl_misc_utils.h" #include "packet_parser.h" diff --git a/erts/emulator/beam/erl_instrument.c b/erts/emulator/beam/erl_instrument.c deleted file mode 100644 index 2f70e7996e..0000000000 --- a/erts/emulator/beam/erl_instrument.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2003-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "global.h" -#include "big.h" -#include "erl_instrument.h" -#include "erl_threads.h" - -typedef union { long l; double d; } Align_t; - -typedef struct { - Uint size; -#ifdef VALGRIND - void* valgrind_leak_suppressor; -#endif - Align_t mem[1]; -} StatBlock_t; - -#define STAT_BLOCK_HEADER_SIZE (sizeof(StatBlock_t) - sizeof(Align_t)) - -typedef struct MapStatBlock_t_ MapStatBlock_t; -struct MapStatBlock_t_ { - Uint size; - ErtsAlcType_t type_no; - Eterm pid; - MapStatBlock_t *prev; - MapStatBlock_t *next; - Align_t mem[1]; -}; - -#define MAP_STAT_BLOCK_HEADER_SIZE (sizeof(MapStatBlock_t) - sizeof(Align_t)) - -typedef struct { - Uint size; - Uint max_size; - Uint max_size_ever; - - Uint blocks; - Uint max_blocks; - Uint max_blocks_ever; -} Stat_t; - -static erts_mtx_t instr_mutex; -static erts_mtx_t instr_x_mutex; - -int erts_instr_memory_map; -int erts_instr_stat; - -static ErtsAllocatorFunctions_t real_allctrs[ERTS_ALC_A_MAX+1]; - -struct stats_ { - Stat_t tot; - Stat_t a[ERTS_ALC_A_MAX+1]; - Stat_t *ap[ERTS_ALC_A_MAX+1]; - Stat_t c[ERTS_ALC_C_MAX+1]; - Stat_t n[ERTS_ALC_N_MAX+1]; -}; - -static struct stats_ *stats; - -static MapStatBlock_t *mem_anchor; - -static Eterm *am_tot; -static Eterm *am_n; -static Eterm *am_a; -static Eterm *am_c; - -static int atoms_initialized; - -static struct { - Eterm total; - Eterm allocators; - Eterm classes; - Eterm types; - Eterm sizes; - Eterm blocks; - Eterm instr_hdr; -#ifdef DEBUG - Eterm end_of_atoms; -#endif -} am; - -static void ERTS_INLINE atom_init(Eterm *atom, const char *name) -{ - *atom = am_atom_put((char *) name, sys_strlen(name)); -} -#define AM_INIT(AM) atom_init(&am.AM, #AM) - -static void -init_atoms(void) -{ -#ifdef DEBUG - Eterm *atom; - for (atom = (Eterm *) &am; atom <= &am.end_of_atoms; atom++) { - *atom = THE_NON_VALUE; - } -#endif - - AM_INIT(total); - AM_INIT(allocators); - AM_INIT(classes); - AM_INIT(types); - AM_INIT(sizes); - AM_INIT(blocks); - AM_INIT(instr_hdr); - -#ifdef DEBUG - for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) { - ASSERT(*atom != THE_NON_VALUE); - } -#endif - - atoms_initialized = 1; -} - -#undef AM_INIT - -static void -init_am_tot(void) -{ - am_tot = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - sizeof(Eterm)); - atom_init(am_tot, "total"); -} - - -static void -init_am_n(void) -{ - int i; - am_n = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_N_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_N_MIN; i <= ERTS_ALC_N_MAX; i++) { - atom_init(&am_n[i], ERTS_ALC_N2TD(i)); - } - -} - -static void -init_am_c(void) -{ - int i; - am_c = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_C_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_C_MIN; i <= ERTS_ALC_C_MAX; i++) { - atom_init(&am_c[i], ERTS_ALC_C2CD(i)); - } - -} - -static void -init_am_a(void) -{ - int i; - am_a = (Eterm *) erts_alloc(ERTS_ALC_T_INSTR_INFO, - (ERTS_ALC_A_MAX+1)*sizeof(Eterm)); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - atom_init(&am_a[i], ERTS_ALC_A2AD(i)); - } - -} - -static ERTS_INLINE void -stat_upd_alloc(ErtsAlcType_t n, Uint size) -{ - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - stats->ap[a]->size += size; - if (stats->ap[a]->max_size < stats->ap[a]->size) - stats->ap[a]->max_size = stats->ap[a]->size; - - stats->c[c].size += size; - if (stats->c[c].max_size < stats->c[c].size) - stats->c[c].max_size = stats->c[c].size; - - stats->n[n].size += size; - if (stats->n[n].max_size < stats->n[n].size) - stats->n[n].max_size = stats->n[n].size; - - stats->tot.size += size; - if (stats->tot.max_size < stats->tot.size) - stats->tot.max_size = stats->tot.size; - - stats->ap[a]->blocks++; - if (stats->ap[a]->max_blocks < stats->ap[a]->blocks) - stats->ap[a]->max_blocks = stats->ap[a]->blocks; - - stats->c[c].blocks++; - if (stats->c[c].max_blocks < stats->c[c].blocks) - stats->c[c].max_blocks = stats->c[c].blocks; - - stats->n[n].blocks++; - if (stats->n[n].max_blocks < stats->n[n].blocks) - stats->n[n].max_blocks = stats->n[n].blocks; - - stats->tot.blocks++; - if (stats->tot.max_blocks < stats->tot.blocks) - stats->tot.max_blocks = stats->tot.blocks; - -} - - -static ERTS_INLINE void -stat_upd_free(ErtsAlcType_t n, Uint size) -{ - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - ASSERT(stats->ap[a]->size >= size); - stats->ap[a]->size -= size; - - ASSERT(stats->c[c].size >= size); - stats->c[c].size -= size; - - ASSERT(stats->n[n].size >= size); - stats->n[n].size -= size; - - ASSERT(stats->tot.size >= size); - stats->tot.size -= size; - - ASSERT(stats->ap[a]->blocks > 0); - stats->ap[a]->blocks--; - - ASSERT(stats->c[c].blocks > 0); - stats->c[c].blocks--; - - ASSERT(stats->n[n].blocks > 0); - stats->n[n].blocks--; - - ASSERT(stats->tot.blocks > 0); - stats->tot.blocks--; - -} - - -static ERTS_INLINE void -stat_upd_realloc(ErtsAlcType_t n, Uint size, Uint old_size) -{ - if (old_size) - stat_upd_free(n, old_size); - stat_upd_alloc(n, size); -} - -/* - * stat instrumentation callback functions - */ - -static void stat_pre_lock(void) -{ - erts_mtx_lock(&instr_mutex); -} - -static void stat_pre_unlock(void) -{ - erts_mtx_unlock(&instr_mutex); -} - -static ErtsAllocatorWrapper_t instr_wrapper; - -static void * -stat_alloc(ErtsAlcType_t n, void *extra, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint ssize; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - ssize = size + STAT_BLOCK_HEADER_SIZE; - res = (*real_af->alloc)(n, real_af->extra, ssize); - if (res) { - stat_upd_alloc(n, size); - ((StatBlock_t *) res)->size = size; -#ifdef VALGRIND - /* Suppress "possibly leaks" by storing an actual dummy pointer - to the _start_ of the allocated block.*/ - ((StatBlock_t *) res)->valgrind_leak_suppressor = res; -#endif - res = (void *) ((StatBlock_t *) res)->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void * -stat_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint old_size; - Uint ssize; - void *sptr; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - sptr = (void *) (((char *) ptr) - STAT_BLOCK_HEADER_SIZE); - old_size = ((StatBlock_t *) sptr)->size; - } - else { - sptr = NULL; - old_size = 0; - } - - ssize = size + STAT_BLOCK_HEADER_SIZE; - res = (*real_af->realloc)(n, real_af->extra, sptr, ssize); - if (res) { - stat_upd_realloc(n, size, old_size); - ((StatBlock_t *) res)->size = size; -#ifdef VALGRIND - ((StatBlock_t *) res)->valgrind_leak_suppressor = res; -#endif - res = (void *) ((StatBlock_t *) res)->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void -stat_free(ErtsAlcType_t n, void *extra, void *ptr) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - void *sptr; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - sptr = (void *) (((char *) ptr) - STAT_BLOCK_HEADER_SIZE); - stat_upd_free(n, ((StatBlock_t *) sptr)->size); - } - else { - sptr = NULL; - } - - (*real_af->free)(n, real_af->extra, sptr); - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - -} - -/* - * map stat instrumentation callback functions - */ - -static void map_stat_pre_lock(void) -{ - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); -} - -static void map_stat_pre_unlock(void) -{ - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); -} - -static void * -map_stat_alloc(ErtsAlcType_t n, void *extra, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint msize; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_mutex); - } - - msize = size + MAP_STAT_BLOCK_HEADER_SIZE; - res = (*real_af->alloc)(n, real_af->extra, msize); - if (res) { - MapStatBlock_t *mb = (MapStatBlock_t *) res; - stat_upd_alloc(n, size); - - mb->size = size; - mb->type_no = n; - mb->pid = erts_get_current_pid(); - - mb->prev = NULL; - mb->next = mem_anchor; - if (mem_anchor) - mem_anchor->prev = mb; - mem_anchor = mb; - - res = (void *) mb->mem; - } - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - } - - return res; -} - -static void * -map_stat_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - Uint old_size; - Uint msize; - void *mptr; - void *res; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - mptr = (void *) (((char *) ptr) - MAP_STAT_BLOCK_HEADER_SIZE); - old_size = ((MapStatBlock_t *) mptr)->size; - } - else { - mptr = NULL; - old_size = 0; - } - - msize = size + MAP_STAT_BLOCK_HEADER_SIZE; - res = (*real_af->realloc)(n, real_af->extra, mptr, msize); - if (res) { - MapStatBlock_t *mb = (MapStatBlock_t *) res; - - mb->size = size; - mb->type_no = n; - mb->pid = erts_get_current_pid(); - - stat_upd_realloc(n, size, old_size); - - if (mptr != res) { - - if (mptr) { - if (mb->prev) - mb->prev->next = mb; - else { - ASSERT(mem_anchor == (MapStatBlock_t *) mptr); - mem_anchor = mb; - } - if (mb->next) - mb->next->prev = mb; - } - else { - mb->prev = NULL; - mb->next = mem_anchor; - if (mem_anchor) - mem_anchor->prev = mb; - mem_anchor = mb; - } - - } - - res = (void *) mb->mem; - } - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - } - - return res; -} - -static void -map_stat_free(ErtsAlcType_t n, void *extra, void *ptr) -{ - ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra; - void *mptr; - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - } - - if (ptr) { - MapStatBlock_t *mb; - - mptr = (void *) (((char *) ptr) - MAP_STAT_BLOCK_HEADER_SIZE); - mb = (MapStatBlock_t *) mptr; - - stat_upd_free(n, mb->size); - - if (mb->prev) - mb->prev->next = mb->next; - else - mem_anchor = mb->next; - if (mb->next) - mb->next->prev = mb->prev; - } - else { - mptr = NULL; - } - - (*real_af->free)(n, real_af->extra, mptr); - - if (!erts_is_allctr_wrapper_prelocked()) { - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - } - -} - -static void dump_memory_map_to_stream(fmtfn_t to, void* to_arg) -{ - ErtsAlcType_t n; - MapStatBlock_t *bp; - int lock = !ERTS_IS_CRASH_DUMPING; - if (lock) { - ASSERT(!erts_is_allctr_wrapper_prelocked()); - erts_mtx_lock(&instr_mutex); - } - - /* Write header */ - - erts_cbprintf(to, to_arg, - "{instr_hdr,\n" - " %lu,\n" - " %lu,\n" - " {", - (unsigned long) ERTS_INSTR_VSN, - (unsigned long) MAP_STAT_BLOCK_HEADER_SIZE); - -#if ERTS_ALC_N_MIN != 1 -#error ERTS_ALC_N_MIN is not 1 -#endif - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - const char *astr; - - if (erts_allctrs_info[a].enabled) - astr = ERTS_ALC_A2AD(a); - else - astr = ERTS_ALC_A2AD(ERTS_ALC_A_SYSTEM); - - erts_cbprintf(to, to_arg, - "%s{%s,%s,%s}%s", - (n == ERTS_ALC_N_MIN) ? "" : " ", - ERTS_ALC_N2TD(n), - astr, - ERTS_ALC_C2CD(c), - (n == ERTS_ALC_N_MAX) ? "" : ",\n"); - } - - erts_cbprintf(to, to_arg, "}}.\n"); - - /* Write memory data */ - for (bp = mem_anchor; bp; bp = bp->next) { - if (is_internal_pid(bp->pid)) - erts_cbprintf(to, to_arg, - "{%lu, %lu, %lu, {%lu,%lu,%lu}}.\n", - (UWord) bp->type_no, - (UWord) bp->mem, - (UWord) bp->size, - (UWord) pid_channel_no(bp->pid), - (UWord) pid_number(bp->pid), - (UWord) pid_serial(bp->pid)); - else - erts_cbprintf(to, to_arg, - "{%lu, %lu, %lu, undefined}.\n", - (UWord) bp->type_no, - (UWord) bp->mem, - (UWord) bp->size); - } - - if (lock) - erts_mtx_unlock(&instr_mutex); -} - -int erts_instr_dump_memory_map_to(fmtfn_t to, void* to_arg) -{ - if (!erts_instr_memory_map) - return 0; - - dump_memory_map_to_stream(to, to_arg); - return 1; -} - -int erts_instr_dump_memory_map(const char *name) -{ - int fd; - - if (!erts_instr_memory_map) - return 0; - - fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0640); - if (fd < 0) - return 0; - - dump_memory_map_to_stream(erts_write_fd, (void*)&fd); - - close(fd); - return 1; -} - -Eterm erts_instr_get_memory_map(Process *proc) -{ - MapStatBlock_t *org_mem_anchor; - Eterm hdr_tuple, md_list, res; - Eterm *hp; - Uint hsz; - MapStatBlock_t *bp; -#ifdef DEBUG - Eterm *end_hp; -#endif - - if (!erts_instr_memory_map) - return am_false; - - if (!atoms_initialized) - init_atoms(); - if (!am_n) - init_am_n(); - if (!am_c) - init_am_c(); - if (!am_a) - init_am_a(); - - erts_mtx_lock(&instr_x_mutex); - erts_mtx_lock(&instr_mutex); - - /* Header size */ - hsz = 5 + 1 + (ERTS_ALC_N_MAX+1-ERTS_ALC_N_MIN)*(1 + 4); - - /* Memory data list */ - for (bp = mem_anchor; bp; bp = bp->next) { - if (is_internal_pid(bp->pid)) { -#if (_PID_NUM_SIZE - 1 > MAX_SMALL) - if (internal_pid_number(bp->pid) > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; -#endif -#if (_PID_SER_SIZE - 1 > MAX_SMALL) - if (internal_pid_serial(bp->pid) > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; -#endif - hsz += 4; - } - - if ((UWord) bp->mem > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; - if (bp->size > MAX_SMALL) - hsz += BIG_UINT_HEAP_SIZE; - - hsz += 5 + 2; - } - - hsz += 3; /* Root tuple */ - - org_mem_anchor = mem_anchor; - mem_anchor = NULL; - - erts_mtx_unlock(&instr_mutex); - - hp = HAlloc(proc, hsz); /* May end up calling map_stat_alloc() */ - - erts_mtx_lock(&instr_mutex); - -#ifdef DEBUG - end_hp = hp + hsz; -#endif - - { /* Build header */ - ErtsAlcType_t n; - Eterm type_map; - Uint *hp2 = hp; -#ifdef DEBUG - Uint *hp2_end; -#endif - - hp += (ERTS_ALC_N_MAX + 1 - ERTS_ALC_N_MIN)*4; - -#ifdef DEBUG - hp2_end = hp; -#endif - - type_map = make_tuple(hp); - *(hp++) = make_arityval(ERTS_ALC_N_MAX + 1 - ERTS_ALC_N_MIN); - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - if (!erts_allctrs_info[a].enabled) - a = ERTS_ALC_A_SYSTEM; - - *(hp++) = TUPLE3(hp2, am_n[n], am_a[a], am_c[c]); - hp2 += 4; - } - - ASSERT(hp2 == hp2_end); - - hdr_tuple = TUPLE4(hp, - am.instr_hdr, - make_small(ERTS_INSTR_VSN), - make_small(MAP_STAT_BLOCK_HEADER_SIZE), - type_map); - - hp += 5; - } - - /* Build memory data list */ - - for (md_list = NIL, bp = org_mem_anchor; bp; bp = bp->next) { - Eterm tuple; - Eterm type; - Eterm ptr; - Eterm size; - Eterm pid; - - if (is_not_internal_pid(bp->pid)) - pid = am_undefined; - else { - Eterm c; - Eterm n; - Eterm s; - -#if (ERST_INTERNAL_CHANNEL_NO > MAX_SMALL) -#error Oversized internal channel number -#endif - c = make_small(ERST_INTERNAL_CHANNEL_NO); - -#if (_PID_NUM_SIZE - 1 > MAX_SMALL) - if (internal_pid_number(bp->pid) > MAX_SMALL) { - n = uint_to_big(internal_pid_number(bp->pid), hp); - hp += BIG_UINT_HEAP_SIZE; - } - else -#endif - n = make_small(internal_pid_number(bp->pid)); - -#if (_PID_SER_SIZE - 1 > MAX_SMALL) - if (internal_pid_serial(bp->pid) > MAX_SMALL) { - s = uint_to_big(internal_pid_serial(bp->pid), hp); - hp += BIG_UINT_HEAP_SIZE; - } - else -#endif - s = make_small(internal_pid_serial(bp->pid)); - pid = TUPLE3(hp, c, n, s); - hp += 4; - } - - -#if ERTS_ALC_N_MAX > MAX_SMALL -#error Oversized memory type number -#endif - type = make_small(bp->type_no); - - if ((UWord) bp->mem > MAX_SMALL) { - ptr = uint_to_big((UWord) bp->mem, hp); - hp += BIG_UINT_HEAP_SIZE; - } - else - ptr = make_small((UWord) bp->mem); - - if (bp->size > MAX_SMALL) { - size = uint_to_big(bp->size, hp); - hp += BIG_UINT_HEAP_SIZE; - } - else - size = make_small(bp->size); - - tuple = TUPLE4(hp, type, ptr, size, pid); - hp += 5; - - md_list = CONS(hp, tuple, md_list); - hp += 2; - } - - res = TUPLE2(hp, hdr_tuple, md_list); - - ASSERT(hp + 3 == end_hp); - - if (mem_anchor) { - for (bp = mem_anchor; bp->next; bp = bp->next) - ; - ASSERT(org_mem_anchor); - org_mem_anchor->prev = bp; - bp->next = org_mem_anchor; - } - else { - mem_anchor = org_mem_anchor; - } - - erts_mtx_unlock(&instr_mutex); - erts_mtx_unlock(&instr_x_mutex); - - return res; -} - -static ERTS_INLINE void -begin_new_max_period(Stat_t *stat, int min, int max) -{ - int i; - for (i = min; i <= max; i++) { - stat[i].max_size = stat[i].size; - stat[i].max_blocks = stat[i].blocks; - } -} - -static ERTS_INLINE void -update_max_ever_values(Stat_t *stat, int min, int max) -{ - int i; - for (i = min; i <= max; i++) { - if (stat[i].max_size_ever < stat[i].max_size) - stat[i].max_size_ever = stat[i].max_size; - if (stat[i].max_blocks_ever < stat[i].max_blocks) - stat[i].max_blocks_ever = stat[i].max_blocks; - } -} - -#define bld_string erts_bld_string -#define bld_tuple erts_bld_tuple -#define bld_tuplev erts_bld_tuplev -#define bld_list erts_bld_list -#define bld_2tup_list erts_bld_2tup_list -#define bld_uint erts_bld_uint - -Eterm -erts_instr_get_stat(Process *proc, Eterm what, int begin_max_period) -{ - int i, len, max, min, allctr; - Eterm *names, *values, res; - Uint arr_size, stat_size, hsz, *hszp, *hp, **hpp; - Stat_t *stat_src, *stat; - - if (!erts_instr_stat) - return am_false; - - if (!atoms_initialized) - init_atoms(); - - if (what == am.total) { - min = 0; - max = 0; - allctr = 0; - stat_size = sizeof(Stat_t); - stat_src = &stats->tot; - if (!am_tot) - init_am_tot(); - names = am_tot; - } - else if (what == am.allocators) { - min = ERTS_ALC_A_MIN; - max = ERTS_ALC_A_MAX; - allctr = 1; - stat_size = sizeof(Stat_t)*(ERTS_ALC_A_MAX+1); - stat_src = stats->a; - if (!am_a) - init_am_a(); - names = am_a; - } - else if (what == am.classes) { - min = ERTS_ALC_C_MIN; - max = ERTS_ALC_C_MAX; - allctr = 0; - stat_size = sizeof(Stat_t)*(ERTS_ALC_C_MAX+1); - stat_src = stats->c; - if (!am_c) - init_am_c(); - names = &am_c[ERTS_ALC_C_MIN]; - } - else if (what == am.types) { - min = ERTS_ALC_N_MIN; - max = ERTS_ALC_N_MAX; - allctr = 0; - stat_size = sizeof(Stat_t)*(ERTS_ALC_N_MAX+1); - stat_src = stats->n; - if (!am_n) - init_am_n(); - names = &am_n[ERTS_ALC_N_MIN]; - } - else { - return THE_NON_VALUE; - } - - stat = (Stat_t *) erts_alloc(ERTS_ALC_T_TMP, stat_size); - - arr_size = (max - min + 1)*sizeof(Eterm); - - if (allctr) - names = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, arr_size); - - values = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, arr_size); - - erts_mtx_lock(&instr_mutex); - - update_max_ever_values(stat_src, min, max); - - sys_memcpy((void *) stat, (void *) stat_src, stat_size); - - if (begin_max_period) - begin_new_max_period(stat_src, min, max); - - erts_mtx_unlock(&instr_mutex); - - hsz = 0; - hszp = &hsz; - hpp = NULL; - - restart_bld: - - len = 0; - for (i = min; i <= max; i++) { - if (!allctr || erts_allctrs_info[i].enabled) { - Eterm s[2]; - - if (allctr) - names[len] = am_a[i]; - - s[0] = bld_tuple(hpp, hszp, 4, - am.sizes, - bld_uint(hpp, hszp, stat[i].size), - bld_uint(hpp, hszp, stat[i].max_size), - bld_uint(hpp, hszp, stat[i].max_size_ever)); - - s[1] = bld_tuple(hpp, hszp, 4, - am.blocks, - bld_uint(hpp, hszp, stat[i].blocks), - bld_uint(hpp, hszp, stat[i].max_blocks), - bld_uint(hpp, hszp, stat[i].max_blocks_ever)); - - values[len] = bld_list(hpp, hszp, 2, s); - - len++; - } - } - - res = bld_2tup_list(hpp, hszp, len, names, values); - - if (!hpp) { - hp = HAlloc(proc, hsz); - hszp = NULL; - hpp = &hp; - goto restart_bld; - } - - erts_free(ERTS_ALC_T_TMP, (void *) stat); - erts_free(ERTS_ALC_T_TMP, (void *) values); - if (allctr) - erts_free(ERTS_ALC_T_TMP, (void *) names); - - return res; -} - -static void -dump_stat_to_stream(fmtfn_t to, void* to_arg, int begin_max_period) -{ - ErtsAlcType_t i, a_max, a_min; - - erts_mtx_lock(&instr_mutex); - - erts_cbprintf(to, to_arg, - "{instr_vsn,%lu}.\n", - (unsigned long) ERTS_INSTR_VSN); - - update_max_ever_values(&stats->tot, 0, 0); - - erts_cbprintf(to, to_arg, - "{total,[{total,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}]}.\n", - (UWord) stats->tot.size, - (UWord) stats->tot.max_size, - (UWord) stats->tot.max_size_ever, - (UWord) stats->tot.blocks, - (UWord) stats->tot.max_blocks, - (UWord) stats->tot.max_blocks_ever); - - a_max = 0; - a_min = ~0; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) { - if (a_min > i) - a_min = i; - if (a_max < i) - a_max = i; - } - } - - ASSERT(ERTS_ALC_A_MIN <= a_min && a_min <= ERTS_ALC_A_MAX); - ASSERT(ERTS_ALC_A_MIN <= a_max && a_max <= ERTS_ALC_A_MAX); - ASSERT(a_min <= a_max); - - update_max_ever_values(stats->a, a_min, a_max); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == a_min ? "{allocators,\n [" : " ", - ERTS_ALC_A2AD(i), - (UWord) stats->a[i].size, - (UWord) stats->a[i].max_size, - (UWord) stats->a[i].max_size_ever, - (UWord) stats->a[i].blocks, - (UWord) stats->a[i].max_blocks, - (UWord) stats->a[i].max_blocks_ever, - i == a_max ? "]}.\n" : ",\n"); - } - } - - update_max_ever_values(stats->c, ERTS_ALC_C_MIN, ERTS_ALC_C_MAX); - - for (i = ERTS_ALC_C_MIN; i <= ERTS_ALC_C_MAX; i++) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == ERTS_ALC_C_MIN ? "{classes,\n [" : " ", - ERTS_ALC_C2CD(i), - (UWord) stats->c[i].size, - (UWord) stats->c[i].max_size, - (UWord) stats->c[i].max_size_ever, - (UWord) stats->c[i].blocks, - (UWord) stats->c[i].max_blocks, - (UWord) stats->c[i].max_blocks_ever, - i == ERTS_ALC_C_MAX ? "]}.\n" : ",\n" ); - } - - update_max_ever_values(stats->n, ERTS_ALC_N_MIN, ERTS_ALC_N_MAX); - - for (i = ERTS_ALC_N_MIN; i <= ERTS_ALC_N_MAX; i++) { - erts_cbprintf(to, to_arg, - "%s{%s,[{sizes,%lu,%lu,%lu},{blocks,%lu,%lu,%lu}]}%s", - i == ERTS_ALC_N_MIN ? "{types,\n [" : " ", - ERTS_ALC_N2TD(i), - (UWord) stats->n[i].size, - (UWord) stats->n[i].max_size, - (UWord) stats->n[i].max_size_ever, - (UWord) stats->n[i].blocks, - (UWord) stats->n[i].max_blocks, - (UWord) stats->n[i].max_blocks_ever, - i == ERTS_ALC_N_MAX ? "]}.\n" : ",\n" ); - } - - if (begin_max_period) { - begin_new_max_period(&stats->tot, 0, 0); - begin_new_max_period(stats->a, a_min, a_max); - begin_new_max_period(stats->c, ERTS_ALC_C_MIN, ERTS_ALC_C_MAX); - begin_new_max_period(stats->n, ERTS_ALC_N_MIN, ERTS_ALC_N_MAX); - } - - erts_mtx_unlock(&instr_mutex); - -} - -int erts_instr_dump_stat_to(fmtfn_t to, void* to_arg, int begin_max_period) -{ - if (!erts_instr_stat) - return 0; - - dump_stat_to_stream(to, to_arg, begin_max_period); - return 1; -} - -int erts_instr_dump_stat(const char *name, int begin_max_period) -{ - int fd; - - if (!erts_instr_stat) - return 0; - - fd = open(name, O_WRONLY | O_CREAT | O_TRUNC,0640); - if (fd < 0) - return 0; - - dump_stat_to_stream(erts_write_fd, (void*)&fd, begin_max_period); - - close(fd); - return 1; -} - - -Uint -erts_instr_get_total(void) -{ - return erts_instr_stat ? stats->tot.size : 0; -} - -Uint -erts_instr_get_max_total(void) -{ - if (erts_instr_stat) { - update_max_ever_values(&stats->tot, 0, 0); - return stats->tot.max_size_ever; - } - return 0; -} - -Eterm -erts_instr_get_type_info(Process *proc) -{ - Eterm res, *tpls; - Uint hsz, *hszp, *hp, **hpp; - ErtsAlcType_t n; - - if (!am_n) - init_am_n(); - if (!am_a) - init_am_a(); - if (!am_c) - init_am_c(); - - tpls = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, - (ERTS_ALC_N_MAX-ERTS_ALC_N_MIN+1) - * sizeof(Eterm)); - hsz = 0; - hszp = &hsz; - hpp = NULL; - - restart_bld: - -#if ERTS_ALC_N_MIN != 1 -#error ERTS_ALC_N_MIN is not 1 -#endif - - for (n = ERTS_ALC_N_MIN; n <= ERTS_ALC_N_MAX; n++) { - ErtsAlcType_t t = ERTS_ALC_N2T(n); - ErtsAlcType_t a = ERTS_ALC_T2A(t); - ErtsAlcType_t c = ERTS_ALC_T2C(t); - - if (!erts_allctrs_info[a].enabled) - a = ERTS_ALC_A_SYSTEM; - - tpls[n - ERTS_ALC_N_MIN] - = bld_tuple(hpp, hszp, 3, am_n[n], am_a[a], am_c[c]); - } - - res = bld_tuplev(hpp, hszp, ERTS_ALC_N_MAX-ERTS_ALC_N_MIN+1, tpls); - - if (!hpp) { - hp = HAlloc(proc, hsz); - hszp = NULL; - hpp = &hp; - goto restart_bld; - } - - erts_free(ERTS_ALC_T_TMP, tpls); - - return res; -} - -Uint -erts_instr_init(int stat, int map_stat) -{ - Uint extra_sz; - int i; - - am_tot = NULL; - am_n = NULL; - am_c = NULL; - am_a = NULL; - - erts_instr_memory_map = 0; - erts_instr_stat = 0; - atoms_initialized = 0; - - if (!stat && !map_stat) - return 0; - - stats = erts_alloc(ERTS_ALC_T_INSTR_INFO, sizeof(struct stats_)); - - erts_mtx_init(&instr_mutex, "instr", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); - - mem_anchor = NULL; - - /* Install instrumentation functions */ - ERTS_CT_ASSERT(sizeof(erts_allctrs) == sizeof(real_allctrs)); - - sys_memcpy((void *)real_allctrs,(void *)erts_allctrs,sizeof(erts_allctrs)); - - sys_memzero((void *) &stats->tot, sizeof(Stat_t)); - sys_memzero((void *) stats->a, sizeof(Stat_t)*(ERTS_ALC_A_MAX+1)); - sys_memzero((void *) stats->c, sizeof(Stat_t)*(ERTS_ALC_C_MAX+1)); - sys_memzero((void *) stats->n, sizeof(Stat_t)*(ERTS_ALC_N_MAX+1)); - - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - if (erts_allctrs_info[i].enabled) - stats->ap[i] = &stats->a[i]; - else - stats->ap[i] = &stats->a[ERTS_ALC_A_SYSTEM]; - } - - if (map_stat) { - - erts_mtx_init(&instr_x_mutex, "instr_x", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); - - erts_instr_memory_map = 1; - erts_instr_stat = 1; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - erts_allctrs[i].alloc = map_stat_alloc; - erts_allctrs[i].realloc = map_stat_realloc; - erts_allctrs[i].free = map_stat_free; - erts_allctrs[i].extra = (void *) &real_allctrs[i]; - } - instr_wrapper.lock = map_stat_pre_lock; - instr_wrapper.unlock = map_stat_pre_unlock; - extra_sz = MAP_STAT_BLOCK_HEADER_SIZE; - } - else { - erts_instr_stat = 1; - for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { - erts_allctrs[i].alloc = stat_alloc; - erts_allctrs[i].realloc = stat_realloc; - erts_allctrs[i].free = stat_free; - erts_allctrs[i].extra = (void *) &real_allctrs[i]; - } - instr_wrapper.lock = stat_pre_lock; - instr_wrapper.unlock = stat_pre_unlock; - extra_sz = STAT_BLOCK_HEADER_SIZE; - } - erts_allctr_wrapper_prelock_init(&instr_wrapper); - return extra_sz; -} - diff --git a/erts/emulator/beam/erl_instrument.h b/erts/emulator/beam/erl_instrument.h deleted file mode 100644 index 351172b2fa..0000000000 --- a/erts/emulator/beam/erl_instrument.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2003-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifndef ERL_INSTRUMENT_H__ -#define ERL_INSTRUMENT_H__ - -#include "erl_mtrace.h" - -#define ERTS_INSTR_VSN 2 - -extern int erts_instr_memory_map; -extern int erts_instr_stat; - -Uint erts_instr_init(int stat, int map_stat); -int erts_instr_dump_memory_map_to(fmtfn_t to, void* to_arg); -int erts_instr_dump_memory_map(const char *name); -Eterm erts_instr_get_memory_map(Process *process); -int erts_instr_dump_stat_to(fmtfn_t to, void* to_arg, int begin_max_period); -int erts_instr_dump_stat(const char *name, int begin_max_period); -Eterm erts_instr_get_stat(Process *proc, Eterm what, int begin_max_period); -Eterm erts_instr_get_type_info(Process *proc); -Uint erts_instr_get_total(void); -Uint erts_instr_get_max_total(void); - -#endif diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 47b46fdd75..88b30644b7 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -36,7 +36,6 @@ #include "erl_db.h" #include "dist.h" #include "beam_catches.h" -#include "erl_instrument.h" #include "erl_threads.h" #include "erl_binary.h" #include "beam_bp.h" @@ -2508,6 +2507,8 @@ handle_yield(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) yield |= erts_handle_yielded_ets_all_request(awdp->esdp, &awdp->yield.ets_all); + yield |= erts_handle_yielded_alcu_blockscan(awdp->esdp, + &awdp->yield.alcu_blockscan); /* * Other yielding operations... @@ -8334,6 +8335,7 @@ sched_thread_func(void *vesdp) ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL); #endif + erts_alcu_sched_spec_data_init(esdp); erts_ets_sched_spec_data_init(esdp); process_main(esdp->x_reg_array, esdp->f_reg_array); diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index bb97b6690c..e2aa1d9f84 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -593,6 +593,7 @@ typedef struct { ErtsDelayedAuxWorkWakeupJob *job; } delayed_wakeup; struct { + ErtsAlcuBlockscanYieldData alcu_blockscan; ErtsEtsAllYieldData ets_all; /* Other yielding operations... */ } yield; diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl index 46ece531a8..585c5a1871 100644 --- a/erts/emulator/test/process_SUITE.erl +++ b/erts/emulator/test/process_SUITE.erl @@ -2217,36 +2217,44 @@ handle_event(Event, Pid) -> processes_term_proc_list(Config) when is_list(Config) -> Tester = self(), - as_expected = processes_term_proc_list_test(false), - {ok, Node} = start_node(Config, "+Mis true"), - RT = spawn_link(Node, fun () -> - receive after 1000 -> ok end, - processes_term_proc_list_test(false), - Tester ! {it_worked, self()} - end), - receive {it_worked, RT} -> ok end, - stop_node(Node), + + Run = fun(Args) -> + {ok, Node} = start_node(Config, Args), + RT = spawn_link(Node, fun () -> + receive after 1000 -> ok end, + as_expected = processes_term_proc_list_test(false), + Tester ! {it_worked, self()} + end), + receive {it_worked, RT} -> ok end, + stop_node(Node) + end, + + %% We have to run this test case with +S1 since instrument:allocations() + %% will report a free()'d block as present until it's actually deallocated + %% by its employer. + Run("+MSe true +MSatags false +S1"), + Run("+MSe true +MSatags true +S1"), + ok. - + -define(CHK_TERM_PROC_LIST(MC, XB), chk_term_proc_list(?LINE, MC, XB)). chk_term_proc_list(Line, MustChk, ExpectBlks) -> - case {MustChk, instrument:memory_status(types)} of - {false, false} -> + Allocs = instrument:allocations(#{ allocator_types => [sl_alloc] }), + case {MustChk, Allocs} of + {false, {error, not_enabled}} -> not_enabled; - {_, MS} -> - {value, - {ptab_list_deleted_el, - DL}} = lists:keysearch(ptab_list_deleted_el, 1, MS), - case lists:keysearch(blocks, 1, DL) of - {value, {blocks, ExpectBlks, _, _}} -> - ok; - {value, {blocks, Blks, _, _}} -> - exit({line, Line, - mismatch, expected, ExpectBlks, actual, Blks}); - Unexpected -> - exit(Unexpected) + {_, {ok, {_Shift, _Unscanned, ByOrigin}}} -> + ByType = maps:get(system, ByOrigin, #{}), + Hist = maps:get(ptab_list_deleted_el, ByType, {}), + case lists:sum(tuple_to_list(Hist)) of + ExpectBlks -> + ok; + Blks -> + exit({line, Line, mismatch, + expected, ExpectBlks, + actual, Blks}) end end, ok. diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index cf6d1bacca..21a3f40c97 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -79,6 +79,7 @@ static const char plusM_au_allocs[]= { static char *plusM_au_alloc_switches[] = { "as", "asbcst", + "atags", "acul", "acnl", "acfml", diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam Binary files differindex f5967780ad..d1bc4ecd2c 100644 --- a/erts/preloaded/ebin/erts_internal.beam +++ b/erts/preloaded/ebin/erts_internal.beam diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index bffa59338e..9c2f8ef0ae 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -3552,11 +3552,9 @@ max(A, _) -> A. %% -type memory_type() :: 'total' | 'processes' | 'processes_used' | 'system' - | 'atom' | 'atom_used' | 'binary' | 'code' | 'ets' - | 'low' | 'maximum'. + | 'atom' | 'atom_used' | 'binary' | 'code' | 'ets'. -define(CARRIER_ALLOCS, [mseg_alloc]). --define(LOW_ALLOCS, [ll_low_alloc, std_low_alloc]). -define(ALL_NEEDED_ALLOCS, (erlang:system_info(alloc_util_allocators) -- ?CARRIER_ALLOCS)). @@ -3568,9 +3566,7 @@ max(A, _) -> A. atom_used = 0, binary = 0, code = 0, - ets = 0, - low = 0, - maximum = 0}). + ets = 0}). -spec erlang:memory() -> [{Type, Size}] when Type :: memory_type(), @@ -3580,14 +3576,6 @@ memory() -> notsup -> erlang:error(notsup); Mem -> - InstrTail = case Mem#memory.maximum of - 0 -> []; - _ -> [{maximum, Mem#memory.maximum}] - end, - Tail = case Mem#memory.low of - 0 -> InstrTail; - _ -> [{low, Mem#memory.low} | InstrTail] - end, [{total, Mem#memory.total}, {processes, Mem#memory.processes}, {processes_used, Mem#memory.processes_used}, @@ -3596,7 +3584,7 @@ memory() -> {atom_used, Mem#memory.atom_used}, {binary, Mem#memory.binary}, {code, Mem#memory.code}, - {ets, Mem#memory.ets} | Tail] + {ets, Mem#memory.ets}] end. -spec erlang:memory(Type :: memory_type()) -> non_neg_integer(); @@ -3684,16 +3672,6 @@ need_mem_info(binary) -> {false, [binary_alloc], true, false}; need_mem_info(ets) -> {true, [ets_alloc], true, false}; -need_mem_info(low) -> - LowAllocs = ?LOW_ALLOCS -- ?CARRIER_ALLOCS, - {_, _, FeatureList, _} = erlang:system_info(allocator), - AlcUAllocs = case LowAllocs -- FeatureList of - [] -> LowAllocs; - _ -> [] - end, - {false, AlcUAllocs, true, true}; -need_mem_info(maximum) -> - {true, [], true, true}; need_mem_info(_) -> {false, [], false, true}. @@ -3706,8 +3684,6 @@ get_memval(atom_used, #memory{atom_used = V}) -> V; get_memval(binary, #memory{binary = V}) -> V; get_memval(code, #memory{code = V}) -> V; get_memval(ets, #memory{ets = V}) -> V; -get_memval(low, #memory{low = V}) -> V; -get_memval(maximum, #memory{maximum = V}) -> V; get_memval(_, #memory{}) -> 0. memory_is_supported() -> @@ -3762,16 +3738,6 @@ fix_proc([_ | Rest], Acc) -> fix_proc([], Acc) -> Acc. -is_low_alloc(_A, []) -> - false; -is_low_alloc(A, [A|_As]) -> - true; -is_low_alloc(A, [_A|As]) -> - is_low_alloc(A, As). - -is_low_alloc(A) -> - is_low_alloc(A, ?LOW_ALLOCS). - au_mem_data(notsup, _) -> notsup; au_mem_data(_, [{_, false} | _]) -> @@ -3824,16 +3790,11 @@ au_mem_data(#memory{total = Tot, Rest) end; au_mem_data(#memory{total = Tot, - system = Sys, - low = Low} = Mem, - [{A, _, Data} | Rest]) -> + system = Sys} = Mem, + [{_, _, Data} | Rest]) -> Sz = blocks_size(Data, 0), au_mem_data(Mem#memory{total = Tot+Sz, - system = Sys+Sz, - low = case is_low_alloc(A) of - true -> Low+Sz; - false -> Low - end}, + system = Sys+Sz}, Rest); au_mem_data(EMD, []) -> EMD. @@ -3855,10 +3816,6 @@ receive_emd(Ref) -> receive_emd(Ref, #memory{}, erlang:system_info(schedulers)). aa_mem_data(#memory{} = Mem, - [{maximum, Max} | Rest]) -> - aa_mem_data(Mem#memory{maximum = Max}, - Rest); -aa_mem_data(#memory{} = Mem, [{total, Tot} | Rest]) -> aa_mem_data(Mem#memory{total = Tot, system = 0}, % system will be adjusted later diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl index da5c9c68ed..79169b7d23 100644 --- a/erts/preloaded/src/erts_internal.erl +++ b/erts/preloaded/src/erts_internal.erl @@ -80,6 +80,8 @@ -export([is_process_alive/1, is_process_alive/2]). +-export([gather_alloc_histograms/1, gather_carrier_info/1]). + %% %% Await result of send to port %% @@ -621,3 +623,24 @@ is_process_alive(Pid) -> Res end. +-spec gather_alloc_histograms({Type, SchedId, HistWidth, HistStart, Ref}) -> MsgCount when + Type :: atom(), + SchedId :: non_neg_integer(), + HistWidth :: non_neg_integer(), + HistStart :: non_neg_integer(), + Ref :: reference(), + MsgCount :: non_neg_integer(). + +gather_alloc_histograms(_) -> + erlang:nif_error(undef). + +-spec gather_carrier_info({Type, SchedId, HistWidth, HistStart, Ref}) -> MsgCount when + Type :: atom(), + SchedId :: non_neg_integer(), + HistWidth :: non_neg_integer(), + HistStart :: non_neg_integer(), + Ref :: reference(), + MsgCount :: non_neg_integer(). + +gather_carrier_info(_) -> + erlang:nif_error(undef). diff --git a/lib/observer/test/crashdump_viewer_SUITE.erl b/lib/observer/test/crashdump_viewer_SUITE.erl index 41ca3f3ce9..8bd7ad387b 100644 --- a/lib/observer/test/crashdump_viewer_SUITE.erl +++ b/lib/observer/test/crashdump_viewer_SUITE.erl @@ -674,7 +674,7 @@ do_create_dumps(DataDir,Rel) -> end, case Rel of current -> - CD3 = dump_with_args(DataDir,Rel,"instr","+Mim true"), + CD3 = dump_with_args(DataDir,Rel,"instr","+Muatags true"), CD4 = dump_with_strange_module_name(DataDir,Rel,"strangemodname"), CD5 = dump_with_size_limit_reached(DataDir,Rel,"trunc_bytes"), CD6 = dump_with_unicode_atoms(DataDir,Rel,"unicode"), diff --git a/lib/tools/doc/src/instrument.xml b/lib/tools/doc/src/instrument.xml index bb6f9b6100..9fd9332373 100644 --- a/lib/tools/doc/src/instrument.xml +++ b/lib/tools/doc/src/instrument.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1998</year><year>2016</year> + <year>1998</year><year>2018</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -41,387 +41,190 @@ <note> <p>Note that this whole module is experimental, and the representations used as well as the functionality is likely to change in the future.</p> - <p>The <c>instrument</c> module interface was slightly changed in - Erlang/OTP R9C.</p> </note> - <p>To start an Erlang runtime system with instrumentation, use the - <c>+Mi*</c> set of command-line arguments to the <c>erl</c> command (see - the erts_alloc(3) and erl(1) man pages).</p> - <p>The basic object of study in the case of memory allocation is a memory - allocation map. A memory allocation map contains a list of descriptors - for each allocated memory block. Currently, a descriptor is a 4-tuple</p> - <pre> - {TypeNo, Address, Size, PidDesc} </pre> - <p>where <c>TypeNo</c> is the memory block type number, <c>Address</c> - is its place in memory, and <c>Size</c> is its size, in bytes. - <c>PidDesc</c> is either a tuple <c>{X,Y,Z}</c> identifying the - process which was executing when the block was allocated, or - <c>undefined</c> if no process was executing. The pid tuple - <c>{X,Y,Z}</c> can be transformed into a real pid by usage of the - <c>c:pid/3</c> function.</p> - <p>Various details about memory allocation:</p> - <p>Memory blocks are allocated both on the heap segment and on other memory - segments. This can cause the instrumentation functionality to report - very large holes. Currently the instrumentation functionality doesn't - provide any support for distinguishing between holes between memory - segments, and holes between allocated blocks inside memory segments. - The current size of the process cannot be obtained from within Erlang, - but can be seen with one of the system statistics tools, e.g., - <c>ps</c> or <c>top</c>. The Solaris utility <c>pmap</c> can be - useful. It reports currently mapped memory segments. </p> - <p>Overhead for instrumentation: When the emulator has been started with - the <seealso marker="erts:erts_alloc#Mim">"+Mim true"</seealso> - flag, each block is preceded by a 24 bytes large - header on a 32-bit machine and a 48 bytes large header on a 64-bit - machine. When the emulator has been started with the - <seealso marker="erts:erts_alloc#Mis">"+Mis true"</seealso> - flag, each block is preceded by an 8 bytes large header. These are the header - sizes used by the Erlang 5.3/OTP R9C emulator. Other versions of the - emulator may use other header sizes. The function - <seealso marker="#block_header_size/1">block_header_size/1</seealso> - can be used for retrieving the header size used for a specific memory - allocation map. The time overhead for managing the instrumentation - data is small.</p> - <p>Sizes presented by the instrumentation functionality are (by the - emulator) requested sizes, i.e. neither instrumentation headers nor - headers used by allocators are included.</p> </description> + <datatypes> + <datatype> + <name name="block_histogram"/> + <desc> + <p>A histogram of block sizes where each interval's upper bound is + twice as high as the one before it.</p> + <p>The upper bound of the first interval is provided by the function + that returned the histogram, and the last interval has no upper + bound.</p> + </desc> + </datatype> + <datatype> + <name name="allocation_summary"/> + <desc> + <p>A summary of allocated block sizes (including their headers) grouped + by their <c><anno>Origin</anno></c> and <c><anno>Type</anno></c>.</p> + <p><c><anno>Origin</anno></c> is generally which NIF or driver that + allocated the blocks, or 'system' if it could not be determined.</p> + <p><c><anno>Type</anno></c> is the allocation category that the blocks + belong to, e.g. <c>db_term</c>, <c>message</c> or <c>binary</c>.</p> + <p>If one or more carriers could not be scanned in full without harming + the responsiveness of the system, <c><anno>UnscannedSize</anno></c> + is the number of bytes that had to be skipped.</p> + </desc> + </datatype> + <datatype> + <name name="carrier_info_list"/> + <desc> + <p><c><anno>AllocatorType</anno></c> is the type of the allocator that + employs this carrier.</p> + <p><c><anno>TotalSize</anno></c> is the total size of the carrier, + including its header.</p> + <p><c><anno>AllocatedSize</anno></c> is the combined size of the + carrier's allocated blocks, including their headers.</p> + <p><c><anno>AllocatedCount</anno></c> is the number of allocated + blocks in the carrier.</p> + <p><c><anno>InPool</anno></c> is whether the carrier is in the + migration pool.</p> + <p><c><anno>FreeBlocks</anno></c> is a histogram of the free block + sizes in the carrier.</p> + <p>If the carrier could not be scanned in full without harming the + responsiveness of the system, <c><anno>UnscannedSize</anno></c> is + the number of bytes that had to be skipped.</p> + </desc> + </datatype> + </datatypes> <funcs> + <func> - <name>allocator_descr(MemoryData, TypeNo) -> AllocDescr | invalid_type | "unknown"</name> - <fsummary>Returns a allocator description</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>TypeNo = int()</v> - <v>AllocDescr = atom() | string()</v> - </type> - <desc> - <p>Returns the allocator description of the allocator that - manages memory blocks of type number <c>TypeNo</c> used in - <c>MemoryData</c>. - Valid <c>TypeNo</c>s are in the range returned by - <seealso marker="#type_no_range/1">type_no_range/1</seealso> on - this specific memory allocation map. If <c>TypeNo</c> is an - invalid integer, <c>invalid_type</c> is returned.</p> - </desc> - </func> - <func> - <name>block_header_size(MemoryData) -> int()</name> - <fsummary>Returns the memory block header size used by the emulator that generated the memory allocation map</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> - <desc> - <marker id="block_header_size_1"></marker> - <p>Returns the memory block header size used by the - emulator that generated the memory allocation map. The block - header size may differ between different emulators.</p> - </desc> - </func> - <func> - <name>class_descr(MemoryData, TypeNo) -> ClassDescr | invalid_type | "unknown"</name> - <fsummary>Returns a allocator description</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>TypeNo = int()</v> - <v>ClassDescr = atom() | string()</v> - </type> - <desc> - <p>Returns the class description of the class that - the type number <c>TypeNo</c> used in <c>MemoryData</c> belongs - to. - Valid <c>TypeNo</c>s are in the range returned by - <seealso marker="#type_no_range/1">type_no_range/1</seealso> on - this specific memory allocation map. If <c>TypeNo</c> is an - invalid integer, <c>invalid_type</c> is returned.</p> - </desc> - </func> - <func> - <name>descr(MemoryData) -> DescrMemoryData</name> - <fsummary>Replace type numbers in memory allocation map with type descriptions</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>DescrMemoryData = {term(), DescrAllocList}</v> - <v>DescrAllocList = [DescrDesc]</v> - <v>DescrDesc = {TypeDescr, int(), int(), DescrPidDesc}</v> - <v>TypeDescr = atom() | string()</v> - <v>DescrPidDesc = pid() | undefined</v> - </type> - <desc> - <p>Returns a memory allocation map where the type numbers (first - element of <c>Desc</c>) have been replaced by type descriptions, - and pid tuples (fourth element of <c>Desc</c>) have been - replaced by real pids.</p> - </desc> - </func> - <func> - <name>holes(MemoryData) -> ok</name> - <fsummary>Print out the sizes of unused memory blocks</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> - <desc> - <p>Prints out the size of each hole (i.e., the space between - allocated blocks) on the terminal. <em>NOTE:</em> Really large holes - are probably holes between memory segments. - The memory allocation map has to be sorted (see - <seealso marker="#sort/1">sort/1</seealso>).</p> - </desc> - </func> - <func> - <name>mem_limits(MemoryData) -> {Low, High}</name> - <fsummary>Return lowest and highest memory address used</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>Low = High = int()</v> - </type> - <desc> - <p>Returns a tuple <c>{Low, High}</c> indicating - the lowest and highest address used. - The memory allocation map has to be sorted (see - <seealso marker="#sort/1">sort/1</seealso>).</p> - </desc> - </func> - <func> - <name>memory_data() -> MemoryData | false</name> - <fsummary>Return the current memory allocation map</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> - <desc> - <p>Returns <c>MemoryData</c> (a the memory allocation map) - if the emulator has been started with the "<c>+Mim true</c>" - command-line argument; otherwise, <c>false</c>. <em>NOTE:</em><c>memory_data/0</c> blocks execution of other processes while - the data is collected. The time it takes to collect the data can - be substantial.</p> - </desc> - </func> - <func> - <name>memory_status(StatusType) -> [StatusInfo] | false</name> - <fsummary>Return current memory allocation status</fsummary> - <type> - <v>StatusType = total | allocators | classes | types</v> - <v>StatusInfo = {About, [Info]}</v> - <v>About = atom()</v> - <v>Info = {InfoName, Current, MaxSinceLast, MaxEver}</v> - <v>InfoName = sizes|blocks</v> - <v>Current = int()</v> - <v>MaxSinceLast = int()</v> - <v>MaxEver = int()</v> - </type> - <desc> - <p>Returns a list of <c>StatusInfo</c> if the emulator has been - started with the "<c>+Mis true</c>" or "<c>+Mim true</c>" - command-line argument; otherwise, <c>false</c>. </p> - <p>See the - <seealso marker="#read_memory_status/1">read_memory_status/1</seealso> - function for a description of the <c>StatusInfo</c> term.</p> - </desc> - </func> - <func> - <name>read_memory_data(File) -> MemoryData | {error, Reason}</name> - <fsummary>Read memory allocation map</fsummary> - <type> - <v>File = string()</v> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> + <name name="allocations" arity="0"/> + <fsummary>Return a summary of all allocations in the system.</fsummary> <desc> - <marker id="read_memory_data_1"></marker> - <p>Reads a memory allocation map from the file <c>File</c> and - returns it. The file is assumed to have been created by - <c>store_memory_data/1</c>. The error codes are the same as for - <c>file:consult/1</c>.</p> + <p>Shorthand for + <seealso marker="#allocations/1"><c>allocations(#{})</c>.</seealso></p> </desc> </func> + <func> - <name>read_memory_status(File) -> MemoryStatus | {error, Reason}</name> - <fsummary>Read memory allocation status from a file</fsummary> - <type> - <v>File = string()</v> - <v>MemoryStatus = [{StatusType, [StatusInfo]}]</v> - <v>StatusType = total | allocators | classes | types</v> - <v>StatusInfo = {About, [Info]}</v> - <v>About = atom()</v> - <v>Info = {InfoName, Current, MaxSinceLast, MaxEver}</v> - <v>InfoName = sizes|blocks</v> - <v>Current = int()</v> - <v>MaxSinceLast = int()</v> - <v>MaxEver = int()</v> - </type> - <desc> - <marker id="read_memory_status_1"></marker> - <p>Reads memory allocation status from the file <c>File</c> and - returns it. The file is assumed to have been created by - <c>store_memory_status/1</c>. The error codes are the same as - for <c>file:consult/1</c>.</p> - <p>When <c>StatusType</c> is <c>allocators</c>, <c>About</c> is - the allocator that the information is about. When - <c>StatusType</c> is <c>types</c>, <c>About</c> is - the memory block type that the information is about. Memory - block types are not described other than by their name and may - vary between emulators. When <c>StatusType</c> is <c>classes</c>, - <c>About</c> is the memory block type class that information is - presented about. Memory block types are classified after their - use. Currently the following classes exist:</p> + <name name="allocations" arity="1"/> + <fsummary>Return a summary of all allocations filtered by allocator type + and scheduler id.</fsummary> + <desc> + <p>Returns a summary of all tagged allocations in the system, + optionally filtered by allocator type and scheduler id.</p> + <p>Only binaries and allocations made by NIFs and drivers are tagged by + default, but this can be configured an a per-allocator basis with the + <seealso marker="erts:erts_alloc#M_atags"><c>+M<S>atags</c> + </seealso> emulator option.</p> + <p>If tagged allocations are not enabled on any of the specified + allocator types, the call will fail with + <c>{error, not_enabled}</c>.</p> + <p>The following options can be used:</p> <taglist> - <tag><c>process_data</c></tag> - <item>Erlang process specific data.</item> - <tag><c>binary_data</c></tag> - <item>Erlang binaries.</item> - <tag><c>atom_data</c></tag> - <item>Erlang atoms.</item> - <tag><c>code_data</c></tag> - <item>Erlang code.</item> - <tag><c>system_data</c></tag> - <item>Other data used by the system</item> + <tag><c>allocator_types</c></tag> + <item> + <p>The allocator types that will be searched. Defaults to all + <c>alloc_util</c> allocators.</p> + </item> + <tag><c>scheduler_ids</c></tag> + <item> + <p>The scheduler ids whose allocator instances will be searched. A + scheduler id of 0 will refer to the global instance that is not + tied to any particular scheduler. Defaults to all schedulers and + the global instance.</p> + </item> + <tag><c>histogram_start</c></tag> + <item> + <p>The upper bound of the first interval in the allocated block + size histograms. Defaults to 128.</p> + </item> + <tag><c>histogram_width</c></tag> + <item> + <p>The number of intervals in the allocated block size histograms. + Defaults to 18.</p> + </item> </taglist> - <p>When <c>InfoName</c> is <c>sizes</c>, <c>Current</c>, - <c>MaxSinceLast</c>, and <c>MaxEver</c> are, respectively, current - size, maximum size since last call to - <c>store_memory_status/1</c> or <c>memory_status/1</c> with the - specific <c>StatusType</c>, and maximum size since the emulator - was started. When <c>InfoName</c> is <c>blocks</c>, <c>Current</c>, - <c>MaxSinceLast</c>, and <c>MaxEver</c> are, respectively, current - number of blocks, maximum number of blocks since last call to - <c>store_memory_status/1</c> or <c>memory_status/1</c> with the - specific <c>StatusType</c>, and maximum number of blocks since the - emulator was started. </p> - <p><em>NOTE:</em>A memory block is accounted for at - "the first level" allocator. E.g. <c>fix_alloc</c> allocates its - memory pools via <c>ll_alloc</c>. When a <c>fix_alloc</c> block - is allocated, neither the block nor the pool in which it resides - are accounted for as memory allocated via <c>ll_alloc</c> even - though it is.</p> - </desc> - </func> - <func> - <name>sort(MemoryData) -> MemoryData</name> - <fsummary>Sort the memory allocation list</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> - <desc> - <marker id="sort_1"></marker> - <p>Sorts a memory allocation map so that the addresses are in - ascending order.</p> - </desc> - </func> - <func> - <name>store_memory_data(File) -> true|false</name> - <fsummary>Store the current memory allocation map on a file</fsummary> - <type> - <v>File = string()</v> - </type> - <desc> - <p>Stores the current memory allocation map on the file - <c>File</c>. Returns <c>true</c> if the emulator has been - started with the "<c>+Mim true</c>" command-line argument, and - the map was successfully stored; otherwise, <c>false</c>. The - contents of the file can later be read using - <seealso marker="#read_memory_data/1">read_memory_data/1</seealso>. - <em>NOTE:</em><c>store_memory_data/0</c> blocks execution of - other processes while the data is collected. The time it takes - to collect the data can be substantial.</p> - </desc> - </func> - <func> - <name>store_memory_status(File) -> true|false</name> - <fsummary>Store the current memory allocation status on a file</fsummary> - <type> - <v>File = string()</v> - </type> - <desc> - <p>Stores the current memory status on the file - <c>File</c>. Returns <c>true</c> if the emulator has been - started with the "<c>+Mis true</c>", or "<c>+Mim true</c>" - command-line arguments, and the data was successfully stored; - otherwise, <c>false</c>. The contents of the file can later be - read using - <seealso marker="#read_memory_status/1">read_memory_status/1</seealso>.</p> - </desc> - </func> - <func> - <name>sum_blocks(MemoryData) -> int()</name> - <fsummary>Return the total amount of memory used</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - </type> - <desc> - <p>Returns the total size of the memory blocks in the list.</p> + <p><em>Example:</em></p> + <code type="none"><![CDATA[ +> instrument:allocations(#{ histogram_start => 128, histogram_width => 15 }). +{ok,{128,0, + #{udp_inet => + #{driver_event_state => {0,0,0,0,0,0,0,0,0,1,0,0,0,0,0}}, + system => + #{heap => {0,0,0,0,20,4,2,2,2,3,0,1,0,0,1}, + db_term => {271,3,1,52,80,1,0,0,0,0,0,0,0,0,0}, + code => {0,0,0,5,3,6,11,22,19,20,10,2,1,0,0}, + binary => {18,0,0,0,7,0,0,1,0,0,0,0,0,0,0}, + message => {0,40,78,2,2,0,0,0,0,0,0,0,0,0,0}, + ... } + spawn_forker => + #{driver_select_data_state => + {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}, + ram_file_drv => #{drv_binary => {0,0,0,0,0,0,1,0,0,0,0,0,0,0,0}}, + prim_file => + #{process_specific_data => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + nif_trap_export_entry => {0,4,0,0,0,0,0,0,0,0,0,0,0,0,0}, + monitor_extended => {0,1,0,0,0,0,0,0,0,0,0,0,0,0,0}, + drv_binary => {0,0,0,0,0,0,1,0,3,5,0,0,0,1,0}, + binary => {0,4,0,0,0,0,0,0,0,0,0,0,0,0,0}}, + prim_buffer => + #{nif_internal => {0,4,0,0,0,0,0,0,0,0,0,0,0,0,0}, + binary => {0,4,0,0,0,0,0,0,0,0,0,0,0,0,0}}}}} + ]]></code> </desc> </func> + <func> - <name>type_descr(MemoryData, TypeNo) -> TypeDescr | invalid_type</name> - <fsummary>Returns a type description</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>TypeNo = int()</v> - <v>TypeDescr = atom() | string()</v> - </type> + <name name="carriers" arity="0"/> + <fsummary>Return a list of all carriers in the system.</fsummary> <desc> - <p>Returns the type description of a type number used in - <c>MemoryData</c>. - Valid <c>TypeNo</c>s are in the range returned by - <seealso marker="#type_no_range/1">type_no_range/1</seealso> on - this specific memory allocation map. If <c>TypeNo</c> is an - invalid integer, <c>invalid_type</c> is returned.</p> + <p>Shorthand for + <seealso marker="#carriers/1"><c>carriers(#{})</c>.</seealso></p> </desc> </func> + <func> - <name>type_no_range(MemoryData) -> {Min, Max}</name> - <fsummary>Returns the memory block type numbers</fsummary> - <type> - <v>MemoryData = {term(), AllocList}</v> - <v>AllocList = [Desc]</v> - <v>Desc = {int(), int(), int(), PidDesc}</v> - <v>PidDesc = {int(), int(), int()} | undefined</v> - <v>Min = int()</v> - <v>Max = int()</v> - </type> + <name name="carriers" arity="1"/> + <fsummary>Return a list of all carriers filtered by allocator type and + scheduler id.</fsummary> <desc> - <marker id="type_no_range_1"></marker> - <p>Returns the memory block type number range used in - <c>MemoryData</c>. When the memory allocation map was generated - by an Erlang 5.3/OTP R9C or newer emulator, all integers <c>T</c> - that satisfy <c>Min</c> <= <c>T</c> <= <c>Max</c> are - valid type numbers. When the memory allocation map was generated - by a pre Erlang 5.3/OTP R9C emulator, all integers in the - range are <em>not</em> valid type numbers.</p> + <p>Returns a summary of all carriers in the system, optionally filtered + by allocator type and scheduler id.</p> + <p>If the specified allocator types are not enabled, the call will fail + with <c>{error, not_enabled}</c>.</p> + <p>The following options can be used:</p> + <taglist> + <tag><c>allocator_types</c></tag> + <item> + <p>The allocator types that will be searched. Defaults to all + <c>alloc_util</c> allocators.</p> + </item> + <tag><c>scheduler_ids</c></tag> + <item> + <p>The scheduler ids whose allocator instances will be searched. A + scheduler id of 0 will refer to the global instance that is not + tied to any particular scheduler. Defaults to all schedulers and + the global instance.</p> + </item> + <tag><c>histogram_start</c></tag> + <item> + <p>The upper bound of the first interval in the free block size + histograms. Defaults to 512.</p> + </item> + <tag><c>histogram_width</c></tag> + <item> + <p>The number of intervals in the free block size histograms. + Defaults to 14.</p> + </item> + </taglist> + <p><em>Example:</em></p> + <code type="none"><![CDATA[ +> instrument:carriers(#{ histogram_start => 512, histogram_width => 8 }). +{ok,{512, + [{ll_alloc,1048576,0,1048344,71,false,{0,0,0,0,0,0,0,0}}, + {binary_alloc,1048576,0,324640,13,false,{3,0,0,1,0,0,0,2}}, + {eheap_alloc,2097152,0,1037200,45,false,{2,1,1,3,4,3,2,2}}, + {fix_alloc,32768,0,29544,82,false,{22,0,0,0,0,0,0,0}}, + {...}|...]}} + ]]></code> </desc> </func> + </funcs> <section> diff --git a/lib/tools/src/instrument.erl b/lib/tools/src/instrument.erl index 055f4a7afb..0203fefe13 100644 --- a/lib/tools/src/instrument.erl +++ b/lib/tools/src/instrument.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2016. All Rights Reserved. +%% Copyright Ericsson AB 1998-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -19,410 +19,140 @@ %% -module(instrument). --export([holes/1, mem_limits/1, memory_data/0, read_memory_data/1, - sort/1, store_memory_data/1, sum_blocks/1, - descr/1, type_descr/2, allocator_descr/2, class_descr/2, - type_no_range/1, block_header_size/1, store_memory_status/1, - read_memory_status/1, memory_status/1]). - - --define(OLD_INFO_SIZE, 32). %% (sizeof(mem_link) in pre R9C utils.c) - --define(IHMARKER(H), element(1, H)). --define(VSN(H), element(2, H)). --define(INFO_SIZE(H), element(3, H)). --define(TYPEMAP(H), element(4, H)). - --define(IHDR(H), is_tuple(H), ?IHMARKER(H) =:= instr_hdr). --define(IHDRVSN(H, V), ?IHDR(H), ?VSN(H) =:= V). - -memory_data() -> - case catch erlang:system_info(allocated) of - {'EXIT',{Error,_}} -> - erlang:error(Error, []); - {'EXIT',Error} -> - erlang:error(Error, []); - Res -> - Res +-export([allocations/0, allocations/1, + carriers/0, carriers/1]). + +-type block_histogram() :: tuple(). + +-type allocation_summary() :: + {HistogramStart :: non_neg_integer(), + UnscannedSize :: non_neg_integer(), + Allocations :: #{ Origin :: atom() => + #{ Type :: atom() => block_histogram() }}}. + +-spec allocations() -> {ok, Result} | {error, Reason} when + Result :: allocation_summary(), + Reason :: not_enabled. +allocations() -> + allocations(#{}). + +-spec allocations(Options) -> {ok, Result} | {error, Reason} when + Result :: allocation_summary(), + Reason :: not_enabled, + Options :: #{ scheduler_ids => list(non_neg_integer()), + allocator_types => list(atom()), + histogram_start => pos_integer(), + histogram_width => pos_integer() }. +allocations(Options) -> + Ref = make_ref(), + + Defaults = #{ scheduler_ids => lists:seq(0, erlang:system_info(schedulers)), + allocator_types => erlang:system_info(alloc_util_allocators), + histogram_start => 128, + histogram_width => 18 }, + + {HistStart, MsgCount} = + dispatch_gather(maps:merge(Defaults, Options), Ref, + fun erts_internal:gather_alloc_histograms/1), + + alloc_hist_receive(HistStart, MsgCount, Ref). + +alloc_hist_receive(_HistStart, 0, _Ref) -> + {error, not_enabled}; +alloc_hist_receive(HistStart, MsgCount, Ref) when MsgCount > 0 -> + {Unscanned, Histograms} = alloc_hist_receive_1(MsgCount, Ref, 0, #{}), + {ok, {HistStart, Unscanned, Histograms}}. + +alloc_hist_receive_1(0, _Ref, Unscanned, Result) -> + {Unscanned, Result}; +alloc_hist_receive_1(MsgCount, Ref, Unscanned0, Result0) -> + receive + {Ref, Unscanned, Tags} -> + Result = lists:foldl(fun alloc_hist_fold_result/2, Result0, Tags), + alloc_hist_receive_1(MsgCount - 1, Ref, Unscanned0 + Unscanned, Result) end. -store_memory_data(File) -> - case catch erlang:system_info({allocated, File}) of - {'EXIT',{Error,_}} -> - erlang:error(Error, [File]); - {'EXIT',Error} -> - erlang:error(Error, [File]); - Res -> - Res +alloc_hist_fold_result({Id, Type, BlockHist}, Result0) -> + IdAllocs0 = maps:get(Id, Result0, #{}), + MergedHists = case maps:find(Type, IdAllocs0) of + {ok, PrevHist} -> + alloc_hist_merge_hist(tuple_size(BlockHist), + BlockHist, + PrevHist); + error -> + BlockHist + end, + IdAllocs = IdAllocs0#{ Type => MergedHists }, + Result0#{ Id => IdAllocs }. + +alloc_hist_merge_hist(0, A, _B) -> + A; +alloc_hist_merge_hist(Index, A, B) -> + Merged = setelement(Index, A, element(Index, A) + element(Index, B)), + alloc_hist_merge_hist(Index - 1, Merged, B). + +-type carrier_info_list() :: + {HistogramStart :: non_neg_integer(), + Carriers :: [{AllocatorType :: atom(), + TotalSize :: non_neg_integer(), + UnscannedSize :: non_neg_integer(), + AllocatedSize :: non_neg_integer(), + AllocatedCount :: non_neg_integer(), + InPool :: boolean(), + FreeBlocks :: block_histogram()}]}. + +-spec carriers() -> {ok, Result} | {error, Reason} when + Result :: carrier_info_list(), + Reason :: not_enabled. +carriers() -> + carriers(#{}). + +-spec carriers(Options) -> {ok, Result} | {error, Reason} when + Result :: carrier_info_list(), + Reason :: not_enabled, + Options :: #{ scheduler_ids => list(non_neg_integer()), + allocator_types => list(atom()), + histogram_start => pos_integer(), + histogram_width => pos_integer() }. +carriers(Options) -> + Ref = make_ref(), + + Defaults = #{ scheduler_ids => lists:seq(0, erlang:system_info(schedulers)), + allocator_types => erlang:system_info(alloc_util_allocators), + histogram_start => 512, + histogram_width => 14 }, + + {HistStart, MsgCount} = + dispatch_gather(maps:merge(Defaults, Options), Ref, + fun erts_internal:gather_carrier_info/1), + + carrier_info_receive(HistStart, MsgCount, Ref). + +carrier_info_receive(_HistStart, 0, _Ref) -> + {error, not_enabled}; +carrier_info_receive(HistStart, MsgCount, Ref) -> + {ok, {HistStart, carrier_info_receive_1(MsgCount, Ref, [])}}. + +carrier_info_receive_1(0, _Ref, Result) -> + lists:flatten(Result); +carrier_info_receive_1(MsgCount, Ref, Result0) -> + receive + {Ref, Carriers} -> + carrier_info_receive_1(MsgCount - 1, Ref, [Carriers, Result0]) end. -memory_status(Type) when is_atom(Type) -> - case catch erlang:system_info({allocated, status, Type}) of - {'EXIT',{Error,_}} -> - erlang:error(Error, [Type]); - {'EXIT',Error} -> - erlang:error(Error, [Type]); - Res -> - Res - end; -memory_status(Type) -> - erlang:error(badarg, [Type]). - -store_memory_status(File) when is_list(File) -> - case catch erlang:system_info({allocated, status, File}) of - {'EXIT',{Error,_}} -> - erlang:error(Error, [File]); - {'EXIT',Error} -> - erlang:error(Error, [File]); - Res -> - Res - end; -store_memory_status(File) -> - erlang:error(badarg, [File]). - -read_memory_data(File) when is_list(File) -> - case file:consult(File) of - {ok, [Hdr|MD]} when ?IHDR(Hdr) -> - {Hdr, MD}; - {ok, [{T,A,S,undefined}|_] = MD} when is_integer(T), - is_integer(A), - is_integer(S) -> - {{instr_hdr, 1, ?OLD_INFO_SIZE}, MD}; - {ok, [{T,A,S,{X,Y,Z}}|_] = MD} when is_integer(T), - is_integer(A), - is_integer(S), - is_integer(X), - is_integer(Y), - is_integer(Z) -> - {{instr_hdr, 1, ?OLD_INFO_SIZE}, MD}; - {ok, _} -> - {error, eio}; - Error -> - Error - end; -read_memory_data(File) -> - erlang:error(badarg, [File]). - -read_memory_status(File) when is_list(File) -> - case file:consult(File) of - {ok, [{instr_vsn, _}|Stat]} -> - Stat; - {ok, _} -> - {error, eio}; - Error -> - Error - end; -read_memory_status(File) -> - erlang:error(badarg, [File]). - -holes({Hdr, MD}) when ?IHDR(Hdr) -> - check_holes(?INFO_SIZE(Hdr), MD). - -check_holes(_ISz, []) -> - ok; -check_holes(ISz, [E | L]) -> - check_holes(ISz, E, L). - -check_holes(_ISz, _E1, []) -> - io:format("~n"); -check_holes(ISz, E1, [E2 | Rest]) -> - check_hole(ISz, E1, E2), - check_holes(ISz, E2, Rest). - -check_hole(ISz, {_,P1,S1,_}, {_,P2,_,_}) -> - End = P1+S1, - Hole = P2 - (End + ISz), - if - Hole =< 7 -> - ok; - true -> - io:format(" ~p", [Hole]) - end. - -sum_blocks({Hdr, L}) when ?IHDR(Hdr) -> - lists:foldl(fun({_,_,S,_}, Sum) -> S+Sum end, - 0, - L). - -mem_limits({Hdr, L}) when ?IHDR(Hdr) -> - {_, P1, _, _} = hd(L), - {_, P2, S2, _} = lists:last(L), - {P1, P2+S2}. - -sort({Hdr, MD}) when ?IHDR(Hdr) -> - {Hdr, lists:keysort(2, MD)}. - -descr({Hdr, MD} = ID) when ?IHDR(Hdr) -> - {Hdr, lists:map(fun ({TN, Addr, Sz, {0, N, S}}) -> - {type_descr(ID, TN), - Addr, - Sz, - list_to_pid("<0." - ++ integer_to_list(N) - ++ "." - ++ integer_to_list(S) - ++ ">")}; - ({TN, Addr, Sz, undefined}) -> - {type_descr(ID, TN), - Addr, - Sz, - undefined} - end, - MD)}. - -block_header_size({Hdr, _}) when ?IHDR(Hdr) -> - ?INFO_SIZE(Hdr). - -type_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 2), - is_integer(TypeNo) -> - case catch element(1, element(TypeNo, ?TYPEMAP(Hdr))) of - {'EXIT', _} -> invalid_type; - Type -> Type - end; -type_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 1), - is_integer(TypeNo) -> - type_string(TypeNo). - - -allocator_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 2), is_integer(TypeNo) -> - case catch element(2, element(TypeNo, ?TYPEMAP(Hdr))) of - {'EXIT', _} -> invalid_type; - Type -> Type - end; -allocator_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 1), is_integer(TypeNo) -> - "unknown". - -class_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 2), is_integer(TypeNo) -> - case catch element(3, element(TypeNo, ?TYPEMAP(Hdr))) of - {'EXIT', _} -> invalid_type; - Type -> Type - end; -class_descr({Hdr, _}, TypeNo) when ?IHDRVSN(Hdr, 1), is_integer(TypeNo) -> - "unknown". - -type_no_range({Hdr, _}) when ?IHDRVSN(Hdr, 2) -> - {1, tuple_size(?TYPEMAP(Hdr))}; -type_no_range({Hdr, _}) when ?IHDRVSN(Hdr, 1) -> - {-1, 1000}. - -type_string(-1) -> - "unknown"; -type_string(1) -> - "atom text"; -type_string(11) -> - "atom desc"; -type_string(2) -> - "bignum (big_to_list)"; -type_string(31) -> - "fixalloc"; -type_string(32) -> - "unknown fixalloc block"; -type_string(33) -> - "message buffer"; -type_string(34) -> - "message link"; -type_string(4) -> - "estack"; -type_string(40) -> - "db table vec"; -type_string(41) -> - "db tree select buffer"; -type_string(43) -> - "db hash select buffer"; -type_string(44) -> - "db hash select list"; -type_string(45) -> - "db match prog stack"; -type_string(46) -> - "db match prog heap data"; -type_string(47) -> - "db temp buffer"; -type_string(48) -> - "db error"; -type_string(49) -> - "db error info"; -type_string(50) -> - "db trans tab"; -type_string(51) -> - "db segment"; -type_string(52) -> - "db term"; -type_string(53) -> - "db add_counter"; -type_string(54) -> - "db segment table"; -type_string(55) -> - "db table (fix)"; -type_string(56) -> - "db bindings"; -type_string(57) -> - "db counter"; -type_string(58) -> - "db trace vec"; -type_string(59) -> - "db fixed deletion"; -type_string(60) -> - "binary (external.c)"; -type_string(61) -> - "binary"; -type_string(62) -> - "procbin (fix)"; -type_string(70) -> - "driver alloc (io.c)"; -type_string(71) -> - "binary (io.c)"; -type_string(72) -> - "binary vec (io.c)"; -type_string(73) -> - "binary vec 2 (io.c)"; -type_string(74) -> - "io vec (io.c)"; -type_string(75) -> - "io vec 2 (io.c)"; -type_string(76) -> - "temp io buffer (io.c)"; -type_string(77) -> - "temp io buffer 2 (io.c)"; -type_string(78) -> - "line buffer (io.c)"; -type_string(8) -> - "heap"; -type_string(801) -> - "heap (1)"; -type_string(802) -> - "heap (2)"; -type_string(803) -> - "heap (3)"; -type_string(804) -> - "heap (4)"; -type_string(805) -> - "heap (5)"; -type_string(821) -> - "heap fragment (1)"; -type_string(822) -> - "heap fragment (2)"; -type_string(830) -> - "sequential store buffer (for vectors)"; -type_string(91) -> - "process table"; -type_string(92) -> - "process desc"; -type_string(110) -> - "hash buckets"; -type_string(111) -> - "hash table"; -type_string(120) -> - "index init"; -type_string(121) -> - "index table"; -type_string(130) -> - "temp buffer"; -type_string(140) -> - "timer wheel"; -type_string(150) -> - "distribution cache"; -type_string(151) -> - "dmem"; -type_string(152) -> - "distribution table"; -type_string(153) -> - "distribution table buckets"; -type_string(154) -> - "distribution table entry"; -type_string(155) -> - "node table"; -type_string(156) -> - "node table buckets"; -type_string(157) -> - "node table entry"; -type_string(160) -> - "port table"; -type_string(161) -> - "driver entry"; -type_string(162) -> - "port setup"; -type_string(163) -> - "port wait"; -type_string(170) -> - "module"; -type_string(171) -> - "fundef"; -type_string(180) -> - "file table"; -type_string(181) -> - "driver table"; -type_string(182) -> - "poll struct"; -type_string(190) -> - "inet driver"; -type_string(200) -> - "efile driver"; -type_string(210) -> - "gc root set"; -type_string(220) -> - "breakpoint data"; -type_string(230) -> - "async queue"; -type_string(231) -> - "async (exit)"; -type_string(232) -> - "async (driver)"; -type_string(240) -> - "bits buffer"; -type_string(241) -> - "bits temp buffer"; -type_string(250) -> - "modules (loader)"; -type_string(251) -> - "code (loader)"; -type_string(252) -> - "atom tab (loader)"; -type_string(253) -> - "import tab (loader)"; -type_string(254) -> - "export tab (loader)"; -type_string(255) -> - "lable tab (loader)"; -type_string(256) -> - "gen op (loader)"; -type_string(257) -> - "gen op args (loader)"; -type_string(258) -> - "gen op args 2 (loader)"; -type_string(259) -> - "gen op args 3 (loader)"; -type_string(260) -> - "lambdas (loader)"; -type_string(261) -> - "temp int buffer (loader)"; -type_string(262) -> - "temp heap (loader)"; -type_string(280) -> - "dist ctrl msg buffer"; -type_string(281) -> - "dist_buf"; -type_string(290) -> - "call trace buffer"; -type_string(300) -> - "bif timer rec"; -type_string(310) -> - "argument registers"; -type_string(320) -> - "compressed binary temp buffer"; -type_string(330) -> - "term_to_binary temp buffer"; -type_string(340) -> - "proc dict"; -type_string(350) -> - "trace to port temp buffer"; -type_string(360) -> - "lists subtract temp buffer"; -type_string(370) -> - "link (lh)"; -type_string(380) -> - "port call buffer"; -type_string(400) -> - "definite_alloc block"; -type_string(_) -> - invalid_type. - +dispatch_gather(#{ allocator_types := AllocatorTypes, + scheduler_ids := SchedulerIds, + histogram_start := HistStart, + histogram_width := HistWidth }, Ref, Gather) + when is_list(AllocatorTypes), + is_list(SchedulerIds), + HistStart >= 1, HistStart =< (1 bsl 28), + HistWidth >= 1, HistWidth =< 32 -> + MsgCount = lists:sum( + [Gather({AllocatorType, SchedId, HistWidth, HistStart, Ref}) || + SchedId <- SchedulerIds, + AllocatorType <- AllocatorTypes]), + {HistStart, MsgCount}; +dispatch_gather(_, _, _) -> + error(badarg). diff --git a/lib/tools/test/instrument_SUITE.erl b/lib/tools/test/instrument_SUITE.erl index f37d28c277..8c521b2e1a 100644 --- a/lib/tools/test/instrument_SUITE.erl +++ b/lib/tools/test/instrument_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2016. All Rights Reserved. +%% Copyright Ericsson AB 1998-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -20,89 +20,274 @@ -module(instrument_SUITE). -export([all/0, suite/0]). --export(['+Mim true'/1, '+Mis true'/1]). + +-export([allocations_enabled/1, allocations_disabled/1, allocations_ramv/1, + carriers_enabled/1, carriers_disabled/1]). + +-export([test_all_alloc/2, test_per_alloc/2, test_format/3, test_abort/1, + generate_test_blocks/0, churn_memory/0]). -include_lib("common_test/include/ct.hrl"). suite() -> [{ct_hooks,[ts_install_cth]}, - {timetrap,{seconds,10}}]. + {timetrap,{minutes,5}}]. all() -> - ['+Mim true', '+Mis true']. - - -%% Check that memory data can be read and processed -'+Mim true'(Config) when is_list(Config) -> - Node = start_slave("+Mim true"), - MD = rpc:call(Node, instrument, memory_data, []), - [{total,[{sizes,S1,S2,S3},{blocks,B1,B2,B3}]}] - = rpc:call(Node, instrument, memory_status, [total]), - stop_slave(Node), - true = S1 =< S2, - true = S2 =< S3, - true = B1 =< B2, - true = B2 =< B3, - MDS = instrument:sort(MD), - {Low, High} = instrument:mem_limits(MDS), - true = Low < High, - {_, AL} = MDS, - SumBlocks = instrument:sum_blocks(MD), - case SumBlocks of - N when is_integer(N) -> - N = lists:foldl(fun ({_,_,Size,_}, Sum) -> - Size+Sum - end, 0, AL), - true = N =< S3; - Other -> - ct:fail(Other) + [allocations_enabled, allocations_disabled, allocations_ramv, + carriers_enabled, carriers_disabled]. + +-define(GENERATED_SBC_BLOCK_COUNT, 1000). +-define(GENERATED_MBC_BLOCK_COUNT, ?GENERATED_SBC_BLOCK_COUNT). + +-define(GENERATED_BLOCK_COUNT, (?GENERATED_SBC_BLOCK_COUNT + + ?GENERATED_MBC_BLOCK_COUNT)). +-define(GENERATED_CARRIER_COUNT, ?GENERATED_SBC_BLOCK_COUNT). + +allocations_test(Args, Plain, PerAlloc) -> + run_test(Args, fun(Node) -> + ok = rpc:call(Node, ?MODULE, test_all_alloc, + [fun instrument:allocations/0, Plain]), + ok = rpc:call(Node, ?MODULE, test_per_alloc, + [fun instrument:allocations/1, PerAlloc]), + ok = rpc:call(Node, ?MODULE, test_format, + [#{ histogram_start => 512, + histogram_width => 4 }, + fun instrument:allocations/1, + fun verify_allocations_output/2]), + ok = rpc:call(Node, ?MODULE, test_abort, + [fun erts_internal:gather_alloc_histograms/1]) + end). + +allocations_enabled(Config) when is_list(Config) -> + allocations_test("+Meamax +Muatags true", + fun verify_allocations_enabled/1, + fun verify_allocations_enabled/2). + +allocations_disabled(Config) when is_list(Config) -> + allocations_test("+Meamax +Muatags false", + fun verify_allocations_disabled/1, + fun verify_allocations_disabled/2). + +allocations_ramv(Config) when is_list(Config) -> + allocations_test("+Meamax +Muatags true +Muramv true", + fun verify_allocations_enabled/1, + fun verify_allocations_enabled/2). + +verify_allocations_disabled(_AllocType, Result) -> + verify_allocations_disabled(Result). + +verify_allocations_disabled({error, not_enabled}) -> + ok. + +%% Skip types that have unstable results or are unaffected by +Muatags +verify_allocations_enabled(literal_alloc, _Result) -> ok; +verify_allocations_enabled(exec_alloc, _Result) -> ok; +verify_allocations_enabled(temp_alloc, _Result) -> ok; +verify_allocations_enabled(sl_alloc, _Result) -> ok; +verify_allocations_enabled(_AllocType, Result) -> + verify_allocations_enabled(Result). + +verify_allocations_enabled({ok, {_HistStart, _UnscannedBytes, Allocs}}) -> + true = Allocs =/= #{}. + +verify_allocations_output(#{ histogram_start := HistStart, + histogram_width := HistWidth }, + {ok, {HistStart, _UnscannedBytes, ByOrigin}}) -> + AllHistograms = lists:flatten([maps:values(ByType) || + ByType <- maps:values(ByOrigin)]), + + %% Do the histograms look alright? + HistogramSet = ordsets:from_list(AllHistograms), + Verified = [H || H <- HistogramSet, + tuple_size(H) =:= HistWidth, + hist_sum(H) >= 1], + [] = ordsets:subtract(HistogramSet, Verified), + + %% Do we have at least as many blocks as we've generated? + BlockCount = lists:foldl(fun(Hist, Acc) -> + hist_sum(Hist) + Acc + end, 0, AllHistograms), + GenTotalBlockCount = ?GENERATED_BLOCK_COUNT, + GenSBCBlockCount = ?GENERATED_SBC_BLOCK_COUNT, + if + BlockCount < GenSBCBlockCount -> + ct:fail("Found ~p blocks, required at least ~p (SB)." , + [BlockCount, GenSBCBlockCount]); + BlockCount >= GenTotalBlockCount -> + ct:pal("Found ~p blocks, expected at least ~p (SB + MB).", + [BlockCount, GenTotalBlockCount]); + BlockCount < GenTotalBlockCount -> + ct:pal("Found ~p blocks, expected at least ~p (SB + MB), but this " + "may be due to MBCs being skipped if they're about to be " + "scanned just as they're fetched from the carrier pool.", + [BlockCount, GenTotalBlockCount]) + end, + + ok; +verify_allocations_output(#{}, {error, not_enabled}) -> + ok. + +%% %% %% %% %% %% + +carriers_test(Args, Plain, PerAlloc) -> + run_test(Args, fun(Node) -> + ok = rpc:call(Node, ?MODULE, test_all_alloc, + [fun instrument:carriers/0, Plain]), + ok = rpc:call(Node, ?MODULE, test_per_alloc, + [fun instrument:carriers/1, PerAlloc]), + ok = rpc:call(Node, ?MODULE, test_format, + [#{ histogram_start => 1024, + histogram_width => 4 }, + fun instrument:carriers/1, + fun verify_carriers_output/2]), + ok = rpc:call(Node, ?MODULE, test_abort, + [fun erts_internal:gather_carrier_info/1]) + end). + +carriers_enabled(Config) when is_list(Config) -> + carriers_test("+Meamax", + fun verify_carriers_enabled/1, + fun verify_carriers_enabled/2). + +carriers_disabled(Config) when is_list(Config) -> + carriers_test("+Meamin", + fun verify_carriers_disabled/1, + fun verify_carriers_disabled/2). + +verify_carriers_disabled(_AllocType, Result) -> + verify_carriers_disabled(Result). + +verify_carriers_disabled({error, not_enabled}) -> + ok; +verify_carriers_disabled({ok, {_HistStart, Carriers}}) -> + verify_carriers_disabled_1(Carriers). + +verify_carriers_disabled_1([]) -> + ok; +%% literal_alloc, exec_alloc, and temp_alloc can't be disabled, so we have to +%% accept their presence in carriers_disabled/test_all_alloc. +verify_carriers_disabled_1([Carrier | Rest]) when + element(1, Carrier) =:= literal_alloc; + element(1, Carrier) =:= exec_alloc; + element(1, Carrier) =:= temp_alloc -> + verify_carriers_disabled_1(Rest). + +%% exec_alloc only has a carrier if it's actually used. +verify_carriers_enabled(exec_alloc, _Result) -> ok; +verify_carriers_enabled(_AllocType, Result) -> verify_carriers_enabled(Result). + +verify_carriers_enabled({ok, {_HistStart, Carriers}}) when Carriers =/= [] -> + ok. + +verify_carriers_output(#{ histogram_start := HistStart, + histogram_width := HistWidth }, + {ok, {HistStart, AllCarriers}}) -> + + %% Do the carriers look alright? + CarrierSet = ordsets:from_list(AllCarriers), + Verified = [C || {AllocType, + TotalSize, + UnscannedSize, + AllocatedSize, + AllocatedCount, + InPool, + FreeBlockHist} = C <- CarrierSet, + is_atom(AllocType), + is_integer(TotalSize), TotalSize >= 1, + is_integer(UnscannedSize), UnscannedSize < TotalSize, + UnscannedSize >= 0, + is_integer(AllocatedSize), AllocatedSize < TotalSize, + AllocatedSize >= 0, + is_integer(AllocatedCount), AllocatedCount =< AllocatedSize, + AllocatedCount >= 0, + is_boolean(InPool), + tuple_size(FreeBlockHist) =:= HistWidth, + carrier_block_check(AllocatedCount, FreeBlockHist)], + [] = ordsets:subtract(CarrierSet, Verified), + + %% Do we have at least as many carriers as we've generated? + CarrierCount = length(AllCarriers), + GenSBCCount = ?GENERATED_SBC_BLOCK_COUNT, + if + CarrierCount < GenSBCCount -> + ct:fail("Carrier count is ~p, expected at least ~p (SBC).", + [CarrierCount, GenSBCCount]); + CarrierCount >= GenSBCCount -> + ok end, - lists:foldl( - fun ({TDescr,Addr,Size,Proc}, MinAddr) -> - true = TDescr /= invalid_type, - true = is_integer(TDescr), - true = is_integer(Addr), - true = is_integer(Size), - true = Addr >= MinAddr, - case Proc of - {0, Number, Serial} -> - true = is_integer(Number), - true = is_integer(Serial); - undefined -> - ok; - BadProc -> - ct:fail({badproc, BadProc}) - end, - NextMinAddr = Addr+Size, - true = NextMinAddr =< High, - NextMinAddr - end, Low, AL), - {_, DAL} = instrument:descr(MDS), - lists:foreach( - fun ({TDescr,_,_,Proc}) -> - true = TDescr /= invalid_type, - true = is_atom(TDescr) orelse is_list(TDescr), - true = is_pid(Proc) orelse Proc == undefined - end, DAL), - ASL = lists:map(fun ({_,A,S,_}) -> {A,S} end, AL), - ASL = lists:map(fun ({_,A,S,_}) -> {A,S} end, DAL), - instrument:holes(MDS), - {comment, "total status - sum of blocks = " ++ integer_to_list(S1-SumBlocks)}. - -%% Check that memory data can be read and processed -'+Mis true'(Config) when is_list(Config) -> - Node = start_slave("+Mis true"), - [{total,[{sizes,S1,S2,S3},{blocks,B1,B2,B3}]}] - = rpc:call(Node, instrument, memory_status, [total]), - true = S1 =< S2, - true = S2 =< S3, - true = B1 =< B2, - true = B2 =< B3, - true = is_list(rpc:call(Node,instrument,memory_status,[allocators])), - true = is_list(rpc:call(Node,instrument,memory_status,[classes])), - true = is_list(rpc:call(Node,instrument,memory_status,[types])), + + ok; +verify_carriers_output(#{}, {error, not_enabled}) -> + ok. + +carrier_block_check(AllocCount, FreeHist) -> + %% A carrier must contain at least one block, and th. number of free blocks + %% must not exceed the number of allocated blocks + 1. + FreeCount = hist_sum(FreeHist), + + (AllocCount + FreeCount) >= 1 andalso FreeCount =< (AllocCount + 1). + +%% %% %% %% %% %% + +test_all_alloc(Gather, Verify) -> + Verify(Gather()), ok. +test_per_alloc(Gather, Verify) -> + [begin + Verify(T, Gather(#{ allocator_types => [T] })) + end || T <- erlang:system_info(alloc_util_allocators)], + ok. + +test_format(#{ allocator_types := _ }, _, _) -> + error(badarg); +test_format(Options0, Gather, Verify) -> + %% We limit format checking to binary_alloc since we generated the test + %% vectors there. + Options = Options0#{ allocator_types => [binary_alloc] }, + Verify(Options, Gather(Options)), + ok. + +test_abort(Gather) -> + %% There's no way for us to tell whether this actually aborted or ran to + %% completion, but it might catch a few segfaults. + Runner = self(), + Ref = make_ref(), + spawn_opt(fun() -> + [Gather({Type, SchedId, 1, 1, Ref}) || + Type <- erlang:system_info(alloc_util_allocators), + SchedId <- lists:seq(0, erlang:system_info(schedulers))], + Runner ! Ref + end, [{priority, max}]), + receive + Ref -> ok + end. + +hist_sum(H) -> hist_sum_1(H, tuple_size(H), 0). +hist_sum_1(_H, 0, A) -> A; +hist_sum_1(H, N, A) -> hist_sum_1(H, N - 1, element(N, H) + A). + +%% + +run_test(Args0, Test) -> + %% Override single-block carrier threshold for binaries to ensure we have + %% coverage for that path. generate_test_blocks builds a few binaries that + %% crosses this threshold. + %% + %% We also set the abandon carrier threshold to 70% to provoke more + %% activity in the carrier pool. + Args = Args0 ++ " +MBsbct 1 +Muacul 70", + Node = start_slave(Args), + + ok = rpc:call(Node, ?MODULE, generate_test_blocks, []), + ok = Test(Node), + + ok = rpc:call(Node, ?MODULE, churn_memory, []), + ok = Test(Node), + + true = test_server:stop_node(Node). + start_slave(Args) -> MicroSecs = erlang:monotonic_time(), Name = "instr" ++ integer_to_list(MicroSecs), @@ -112,6 +297,60 @@ start_slave(Args) -> [{args, "-pa " ++ Pa ++ " " ++ Args}]), Node. +generate_test_blocks() -> + Runner = self(), + Ref = make_ref(), + spawn(fun() -> + %% We've set the single-block carrier threshold to 1KB so one + %% ought to land in a SBC and the other in a MBC. Both are kept + %% alive forever. + SBCs = [<<I, 0:(1 bsl 10)/unit:8>> || + I <- lists:seq(1, ?GENERATED_SBC_BLOCK_COUNT)], + MBCs = [<<I, 0:64/unit:8>> || + I <- lists:seq(1, ?GENERATED_MBC_BLOCK_COUNT)], + Runner ! Ref, + receive after infinity -> ok end, + unreachable ! {SBCs, MBCs} + end), + receive + Ref -> ok + end. -stop_slave(Node) -> - true = test_server:stop_node(Node). +churn_memory() -> + %% All processes spawned from here on have 'low' priority to avoid starving + %% others (e.g. the rpc process) which could cause the test to time out. + [begin + churn_list_to_binary(), + churn_processes(), + churn_ets() + end || _ <- lists:seq(1, erlang:system_info(schedulers))], + ok. + +churn_processes() -> + Pid = spawn_opt(fun churn_processes/0, [{priority, low}]), + [Pid ! <<I, 0:128/unit:8>> || I <- lists:seq(1, 128)]. + +%% Nearly all types have a few allocations at all times but sl_alloc is +%% often empty. list_to_binary on large inputs will yield and spill the +%% state into an 'estack' which is allocated through sl_alloc. +%% +%% This is inherently unstable so we skip the verification step for this +%% type, but there's still a point to hammering it. +churn_list_to_binary() -> + List = binary_to_list(<<0:(1 bsl 20)/unit:8>>), + spawn_opt(fun() -> churn_list_to_binary_1(List) end, [{priority, low}]). + +churn_list_to_binary_1(List) -> + _ = id(list_to_binary(List)), + churn_list_to_binary_1(List). + +churn_ets() -> + spawn_opt(fun() -> churn_ets_1(ets:new(gurka, [])) end, [{priority, low}]). + +churn_ets_1(Tab) -> + ets:insert(Tab, {gaffel, lists:seq(1, 16)}), + ets:delete_all_objects(Tab), + churn_ets_1(Tab). + +id(I) -> + I. |