aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
authorSteve Vinoski <[email protected]>2014-01-09 21:22:45 -0500
committerRickard Green <[email protected]>2014-01-28 15:34:18 +0100
commitc1c03ae4ee50e58b7669ea88ec4d29c6b2b67c7b (patch)
tree83b8ed1fc2cfdfa0371358ef56cd5159a9950eb3 /erts/emulator
parentaa1b40252fdd5aa27d21b70dcbec1e744e3d416b (diff)
downloadotp-c1c03ae4ee50e58b7669ea88ec4d29c6b2b67c7b.tar.gz
otp-c1c03ae4ee50e58b7669ea88ec4d29c6b2b67c7b.tar.bz2
otp-c1c03ae4ee50e58b7669ea88ec4d29c6b2b67c7b.zip
initial support for dirty schedulers and dirty NIFs
Add initial support for dirty schedulers. There are two types of dirty schedulers: CPU schedulers and I/O schedulers. By default, there are as many dirty CPU schedulers as there are normal schedulers and as many dirty CPU schedulers online as normal schedulers online. There are 10 dirty I/O schedulers (similar to the choice of 10 as the default for async threads). By default, dirty schedulers are disabled and conditionally compiled out. To enable them, you must pass --enable-dirty-schedulers to the top-level configure script when building Erlang/OTP. Current dirty scheduler support requires the emulator to be built with SMP support. This restriction will be lifted in the future. You can specify the number of dirty schedulers with the command-line options +SDcpu (for dirty CPU schedulers) and +SDio (for dirty I/O schedulers). The +SDcpu option is similar to the +S option in that it takes two numbers separated by a colon: C1:C2, where C1 specifies the number of dirty schedulers available and C2 specifies the number of dirty schedulers online. The +SDPcpu option allows numbers of dirty CPU schedulers available and dirty CPU schedulers online to be specified as percentages, similar to the existing +SP option for normal schedulers. The number of dirty CPU schedulers created and dirty CPU schedulers online may not exceed the number of normal schedulers created and normal schedulers online, respectively. The +SDio option takes only a single number specifying the number of dirty I/O schedulers available and online. There is no support yet for programmatically changing at run time the number of dirty CPU schedulers online via erlang:system_flag/2. Also, changing the number of normal schedulers online via erlang:system_flag(schedulers_online, NewSchedulersOnline) should ensure that there are no more dirty CPU schedulers than normal schedulers, but this is not yet implemented. You can retrieve the number of dirty schedulers by passing dirty_cpu_schedulers, dirty_cpu_schedulers_online, or dirty_io_schedulers to erlang:system_info/1. Currently only NIFs are able to access dirty scheduler functionality. Neither drivers nor BIFs currently support dirty schedulers. This restriction will be addressed in the future. If dirty scheduler support is present in the runtime, the initial status line Erlang prints before presenting its interactive prompt will include the indicator "[ds:C1:C2:I]" where "ds" indicates "dirty schedulers", "C1" indicates the number of dirty CPU schedulers available, "C2" indicates the number of dirty CPU schedulers online, and "I" indicates the number of dirty I/O schedulers. Document The dirty NIF API in the erl_nif man page. The API closely follows Rickard Green's presentation slides from his talk "Future Extensions to the Native Interface", presented at the 2011 Erlang Factory held in the San Francisco Bay Area. Rickard's slides are available online at http://bit.ly/1m34UHB . Document the new erl command-line options, the additions to erlang:system_info/1, and also add the erlang:system_flag/2 dirty scheduler documentation even though it's not yet implemented. To determine whether the dirty NIF API is available, native code can check to see whether the C preprocessor macro ERL_NIF_DIRTY_SCHEDULER_SUPPORT is defined. To check if dirty schedulers are available at run time, native code can call the boolean enif_have_dirty_schedulers() function, and Erlang code can call erlang:system_info(dirty_cpu_schedulers), which raises badarg if no dirty scheduler support is available. Add a simple dirty NIF test to the emulator NIF suite.
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/beam/beam_emu.c9
-rw-r--r--erts/emulator/beam/beam_load.h1
-rw-r--r--erts/emulator/beam/erl_alloc.c3
-rwxr-xr-xerts/emulator/beam/erl_bif_info.c35
-rw-r--r--erts/emulator/beam/erl_init.c304
-rw-r--r--erts/emulator/beam/erl_lock_check.c3
-rw-r--r--erts/emulator/beam/erl_nif.c151
-rw-r--r--erts/emulator/beam/erl_nif.h17
-rw-r--r--erts/emulator/beam/erl_nif_api_funcs.h14
-rw-r--r--erts/emulator/beam/erl_process.c607
-rw-r--r--erts/emulator/beam/erl_process.h103
-rw-r--r--erts/emulator/test/nif_SUITE.erl19
-rw-r--r--erts/emulator/test/nif_SUITE_data/nif_SUITE.c47
13 files changed, 1122 insertions, 191 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index b413f0e859..7fecdd5c5f 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -217,6 +217,7 @@ BeamInstr beam_continue_exit[1];
BeamInstr* em_call_error_handler;
BeamInstr* em_apply_bif;
+BeamInstr* em_call_nif;
/* NOTE These should be the only variables containing trace instructions.
@@ -3323,6 +3324,13 @@ void process_main(void)
reg[0] = r(0);
nif_bif_result = (*fp)(&env, bif_nif_arity, reg);
erts_post_nif(&env);
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (is_non_value(nif_bif_result) && c_p->freason == TRAP) {
+ Export* ep = (Export*) c_p->psd->data[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT];
+ ep->code[0] = I[-3];
+ ep->code[1] = I[-2];
+ }
+#endif
}
ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(nif_bif_result));
PROCESS_MAIN_CHK_LOCKS(c_p);
@@ -4964,6 +4972,7 @@ void process_main(void)
em_call_error_handler = OpCode(call_error_handler);
em_apply_bif = OpCode(apply_bif);
+ em_call_nif = OpCode(call_nif);
beam_apply[0] = (BeamInstr) OpCode(i_apply);
beam_apply[1] = (BeamInstr) OpCode(normal_exit);
diff --git a/erts/emulator/beam/beam_load.h b/erts/emulator/beam/beam_load.h
index 65a8f26d7c..bd22b0c4de 100644
--- a/erts/emulator/beam/beam_load.h
+++ b/erts/emulator/beam/beam_load.h
@@ -49,6 +49,7 @@ extern void** beam_ops;
extern BeamInstr beam_debug_apply[];
extern BeamInstr* em_call_error_handler;
extern BeamInstr* em_apply_bif;
+extern BeamInstr* em_call_nif;
/*
* The following variables keep a sorted list of address ranges for
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 8094c6ee2e..c6b324dc15 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -1754,6 +1754,9 @@ erts_alloc_register_scheduler(void *vesdp)
int ix = (int) esdp->no;
int aix;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) {
ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix];
esdp->alloc_data.deallctr[aix] = NULL;
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index e0b654cb22..f25b4dbae5 100755
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -90,6 +90,9 @@ static char erts_system_version[] = ("Erlang/OTP " ERLANG_OTP_RELEASE
" [smp:%beu:%beu]"
#endif
#ifdef USE_THREADS
+#ifdef ERTS_DIRTY_SCHEDULERS
+ " [ds:%beu:%beu:%beu]"
+#endif
" [async-threads:%d]"
#endif
#ifdef HIPE
@@ -312,7 +315,13 @@ erts_print_system_version(int to, void *arg, Process *c_p)
char *ocp = otp_correction_package;
#ifdef ERTS_SMP
Uint total, online, active;
- (void) erts_schedulers_state(&total, &online, &active, 0);
+#ifdef ERTS_DIRTY_SCHEDULERS
+ Uint dirty_cpu, dirty_cpu_onln, dirty_io;
+
+ (void) erts_schedulers_state(&total, &online, &active, &dirty_cpu, &dirty_cpu_onln, &dirty_io, 0);
+#else
+ (void) erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 0);
+#endif
#endif
for (i = 0; i < sizeof(otp_correction_package)-4; i++) {
if (ocp[i] == '-' && ocp[i+1] == 'r' && ocp[i+2] == 'c')
@@ -330,6 +339,9 @@ erts_print_system_version(int to, void *arg, Process *c_p)
rc_str
#ifdef ERTS_SMP
, total, online
+#ifdef ERTS_DIRTY_SCHEDULERS
+ , dirty_cpu, dirty_cpu_onln, dirty_io
+#endif
#endif
#ifdef USE_THREADS
, erts_async_max_threads
@@ -2477,6 +2489,9 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
switch (erts_schedulers_state(&total,
&online,
&active,
+ NULL,
+ NULL,
+ NULL,
1)) {
case ERTS_SCHDLR_SSPND_DONE: {
Eterm *hp = HAlloc(BIF_P, 4);
@@ -2500,7 +2515,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_RET(make_small(1));
#else
Uint total, online, active;
- switch (erts_schedulers_state(&total, &online, &active, 1)) {
+ switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) {
case ERTS_SCHDLR_SSPND_DONE:
BIF_RET(make_small(online));
case ERTS_SCHDLR_SSPND_YIELD_RESTART:
@@ -2517,7 +2532,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_RET(make_small(1));
#else
Uint total, online, active;
- switch (erts_schedulers_state(&total, &online, &active, 1)) {
+ switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) {
case ERTS_SCHDLR_SSPND_DONE:
BIF_RET(make_small(active));
case ERTS_SCHDLR_SSPND_YIELD_RESTART:
@@ -2529,6 +2544,20 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR);
}
#endif
+#if defined(ERTS_SMP) && defined(ERTS_DIRTY_SCHEDULERS)
+ } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers", BIF_ARG_1)) {
+ Uint dirty_cpu;
+ erts_schedulers_state(NULL, NULL, NULL, &dirty_cpu, NULL, NULL, 1);
+ BIF_RET(make_small(dirty_cpu));
+ } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers_online", BIF_ARG_1)) {
+ Uint dirty_cpu_onln;
+ erts_schedulers_state(NULL, NULL, NULL, NULL, &dirty_cpu_onln, NULL, 1);
+ BIF_RET(make_small(dirty_cpu_onln));
+ } else if (ERTS_IS_ATOM_STR("dirty_io_schedulers", BIF_ARG_1)) {
+ Uint dirty_io;
+ erts_schedulers_state(NULL, NULL, NULL, NULL, NULL, &dirty_io, 1);
+ BIF_RET(make_small(dirty_io));
+#endif
} else if (ERTS_IS_ATOM_STR("run_queues", BIF_ARG_1)) {
res = make_small(erts_no_run_queues);
BIF_RET(res);
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 19088fd913..c17256f466 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -484,98 +484,107 @@ void erts_usage(void)
/* erts_fprintf(stderr, "-# number set the number of items to be used in traces etc\n"); */
- erts_fprintf(stderr, "-a size suggested stack size in kilo words for threads\n");
- erts_fprintf(stderr, " in the async-thread pool, valid range is [%d-%d]\n",
+ erts_fprintf(stderr, "-a size suggested stack size in kilo words for threads\n");
+ erts_fprintf(stderr, " in the async-thread pool, valid range is [%d-%d]\n",
ERTS_ASYNC_THREAD_MIN_STACK_SIZE,
ERTS_ASYNC_THREAD_MAX_STACK_SIZE);
- erts_fprintf(stderr, "-A number set number of threads in async thread pool,\n");
- erts_fprintf(stderr, " valid range is [0-%d]\n",
+ erts_fprintf(stderr, "-A number set number of threads in async thread pool,\n");
+ erts_fprintf(stderr, " valid range is [0-%d]\n",
ERTS_MAX_NO_OF_ASYNC_THREADS);
- erts_fprintf(stderr, "-B[c|d|i] c to have Ctrl-c interrupt the Erlang shell,\n");
- erts_fprintf(stderr, " d (or no extra option) to disable the break\n");
- erts_fprintf(stderr, " handler, i to ignore break signals\n");
+ erts_fprintf(stderr, "-B[c|d|i] c to have Ctrl-c interrupt the Erlang shell,\n");
+ erts_fprintf(stderr, " d (or no extra option) to disable the break\n");
+ erts_fprintf(stderr, " handler, i to ignore break signals\n");
/* erts_fprintf(stderr, "-b func set the boot function (default boot)\n"); */
- erts_fprintf(stderr, "-c disable continuous date/time correction with\n");
- erts_fprintf(stderr, " respect to uptime\n");
+ erts_fprintf(stderr, "-c disable continuous date/time correction with\n");
+ erts_fprintf(stderr, " respect to uptime\n");
- erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n");
- erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n");
- erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n");
- erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n",
+ erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n");
+ erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n");
+ erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n");
+ erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n",
H_DEFAULT_SIZE);
- erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n",
+ erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n",
VH_DEFAULT_SIZE);
/* erts_fprintf(stderr, "-i module set the boot module (default init)\n"); */
- erts_fprintf(stderr, "-K boolean enable or disable kernel poll\n");
- erts_fprintf(stderr, "-n[s|a|d] Control behavior of signals to ports\n");
- erts_fprintf(stderr, " Note that this flag is deprecated!\n");
- erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n");
- erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n");
- erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n");
- erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n");
- erts_fprintf(stderr, " valid range is [%d-%d]\n",
+ erts_fprintf(stderr, "-K boolean enable or disable kernel poll\n");
+ erts_fprintf(stderr, "-n[s|a|d] Control behavior of signals to ports\n");
+ erts_fprintf(stderr, " Note that this flag is deprecated!\n");
+ erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n");
+ erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n");
+ erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n");
+ erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n");
+ erts_fprintf(stderr, " valid range is [%d-%d]\n",
ERTS_MIN_PROCESSES, ERTS_MAX_PROCESSES);
- erts_fprintf(stderr, "-Q number set maximum number of ports on this node,\n");
- erts_fprintf(stderr, " valid range is [%d-%d]\n",
+ erts_fprintf(stderr, "-Q number set maximum number of ports on this node,\n");
+ erts_fprintf(stderr, " valid range is [%d-%d]\n",
ERTS_MIN_PORTS, ERTS_MAX_PORTS);
- erts_fprintf(stderr, "-R number set compatibility release number,\n");
- erts_fprintf(stderr, " valid range [%d-%d]\n",
+ erts_fprintf(stderr, "-R number set compatibility release number,\n");
+ erts_fprintf(stderr, " valid range [%d-%d]\n",
this_rel-2, this_rel);
- erts_fprintf(stderr, "-r force ets memory block to be moved on realloc\n");
- erts_fprintf(stderr, "-rg amount set reader groups limit\n");
- erts_fprintf(stderr, "-sbt type set scheduler bind type, valid types are:\n");
- erts_fprintf(stderr, "-stbt type u|ns|ts|ps|s|nnts|nnps|tnnps|db\n");
- erts_fprintf(stderr, "-sbwt val set scheduler busy wait threshold, valid values are:\n");
- erts_fprintf(stderr, " none|very_short|short|medium|long|very_long.\n");
- erts_fprintf(stderr, "-scl bool enable/disable compaction of scheduler load,\n");
- erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
- erts_fprintf(stderr, "-sct cput set cpu topology,\n");
- erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
+ erts_fprintf(stderr, "-r force ets memory block to be moved on realloc\n");
+ erts_fprintf(stderr, "-rg amount set reader groups limit\n");
+ erts_fprintf(stderr, "-sbt type set scheduler bind type, valid types are:\n");
+ erts_fprintf(stderr, "-stbt type u|ns|ts|ps|s|nnts|nnps|tnnps|db\n");
+ erts_fprintf(stderr, "-sbwt val set scheduler busy wait threshold, valid values are:\n");
+ erts_fprintf(stderr, " none|very_short|short|medium|long|very_long.\n");
+ erts_fprintf(stderr, "-scl bool enable/disable compaction of scheduler load,\n");
+ erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
+ erts_fprintf(stderr, "-sct cput set cpu topology,\n");
+ erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT
- erts_fprintf(stderr, "-sub bool enable/disable scheduler utilization balancing,\n");
+ erts_fprintf(stderr, "-sub bool enable/disable scheduler utilization balancing,\n");
#else
- erts_fprintf(stderr, "-sub false disable scheduler utilization balancing,\n");
+ erts_fprintf(stderr, "-sub false disable scheduler utilization balancing,\n");
#endif
- erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
- erts_fprintf(stderr, "-sws val set scheduler wakeup strategy, valid values are:\n");
- erts_fprintf(stderr, " default|legacy.\n");
- erts_fprintf(stderr, "-swct val set scheduler wake cleanup threshold, valid values are:\n");
- erts_fprintf(stderr, " very_lazy|lazy|medium|eager|very_eager.\n");
- erts_fprintf(stderr, "-swt val set scheduler wakeup threshold, valid values are:\n");
- erts_fprintf(stderr, " very_low|low|medium|high|very_high.\n");
- erts_fprintf(stderr, "-sss size suggested stack size in kilo words for scheduler threads,\n");
- erts_fprintf(stderr, " valid range is [%d-%d]\n",
+ erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
+ erts_fprintf(stderr, "-sws val set scheduler wakeup strategy, valid values are:\n");
+ erts_fprintf(stderr, " default|legacy.\n");
+ erts_fprintf(stderr, "-swct val set scheduler wake cleanup threshold, valid values are:\n");
+ erts_fprintf(stderr, " very_lazy|lazy|medium|eager|very_eager.\n");
+ erts_fprintf(stderr, "-swt val set scheduler wakeup threshold, valid values are:\n");
+ erts_fprintf(stderr, " very_low|low|medium|high|very_high.\n");
+ erts_fprintf(stderr, "-sss size suggested stack size in kilo words for scheduler threads,\n");
+ erts_fprintf(stderr, " valid range is [%d-%d]\n",
ERTS_SCHED_THREAD_MIN_STACK_SIZE,
ERTS_SCHED_THREAD_MAX_STACK_SIZE);
- erts_fprintf(stderr, "-spp Bool set port parallelism scheduling hint\n");
- erts_fprintf(stderr, "-S n1:n2 set number of schedulers (n1), and number of\n");
- erts_fprintf(stderr, " schedulers online (n2), maximum for both\n");
- erts_fprintf(stderr, " numbers is %d\n",
+ erts_fprintf(stderr, "-spp Bool set port parallelism scheduling hint\n");
+ erts_fprintf(stderr, "-S n1:n2 set number of schedulers (n1), and number of\n");
+ erts_fprintf(stderr, " schedulers online (n2), maximum for both\n");
+ erts_fprintf(stderr, " numbers is %d\n",
ERTS_MAX_NO_OF_SCHEDULERS);
- erts_fprintf(stderr, "-SP p1:p2 specify schedulers (p1) and schedulers online (p2)\n");
- erts_fprintf(stderr, " as percentages of logical processors configured and logical\n");
- erts_fprintf(stderr, " processors available, respectively\n");
- erts_fprintf(stderr, "-t size set the maximum number of atoms the "
- "emulator can handle\n");
- erts_fprintf(stderr, " valid range is [%d-%d]\n",
+ erts_fprintf(stderr, "-SP p1:p2 specify schedulers (p1) and schedulers online (p2)\n");
+ erts_fprintf(stderr, " as percentages of logical processors configured and logical\n");
+ erts_fprintf(stderr, " processors available, respectively\n");
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_fprintf(stderr, "-SDcpu n1:n2 set number of dirty CPU schedulers (n1), and number of\n");
+ erts_fprintf(stderr, " dirty CPU schedulers online (n2), valid range for both\n");
+ erts_fprintf(stderr, " numbers is [1-%d], and n2 must be less than or equal to n1\n",
+ ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS);
+ erts_fprintf(stderr, "-SDPcpu p1:p2 specify dirty CPU schedulers (p1) and dirty CPU schedulers\n");
+ erts_fprintf(stderr, " online (p2) as percentages of logical processors configured\n");
+ erts_fprintf(stderr, " and logical processors available, respectively\n");
+ erts_fprintf(stderr, "-SDio n set number of dirty I/O schedulers, valid range is [0-%d]\n",
+ ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS);
+#endif
+ erts_fprintf(stderr, "-t size set the maximum number of atoms the emulator can handle\n");
+ erts_fprintf(stderr, " valid range is [%d-%d]\n",
MIN_ATOM_TABLE_SIZE, MAX_ATOM_TABLE_SIZE);
- erts_fprintf(stderr, "-T number set modified timing level,\n");
- erts_fprintf(stderr, " valid range is [0-%d]\n",
+ erts_fprintf(stderr, "-T number set modified timing level, valid range is [0-%d]\n",
ERTS_MODIFIED_TIMING_LEVELS-1);
- erts_fprintf(stderr, "-V print Erlang version\n");
+ erts_fprintf(stderr, "-V print Erlang version\n");
- erts_fprintf(stderr, "-v turn on chatty mode (GCs will be reported etc)\n");
+ erts_fprintf(stderr, "-v turn on chatty mode (GCs will be reported etc)\n");
- erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n");
- erts_fprintf(stderr, " see error_logger documentation for details\n");
- erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n");
- erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024);
+ erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n");
+ erts_fprintf(stderr, " see error_logger documentation for details\n");
+ erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n");
+ erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024);
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n");
erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n");
@@ -643,6 +652,13 @@ early_init(int *argc, char **argv) /*
int schdlrs_percentage = 100;
int schdlrs_onln_percentage = 100;
int max_main_threads;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ int dirty_cpu_scheds;
+ int dirty_cpu_scheds_online;
+ int dirty_cpu_scheds_pctg = 100;
+ int dirty_cpu_scheds_onln_pctg = 100;
+ int dirty_io_scheds;
+#endif
int max_reader_groups;
int reader_groups;
char envbuf[21]; /* enough for any 64-bit integer */
@@ -718,6 +734,12 @@ early_init(int *argc, char **argv) /*
schdlrs = no_schedulers;
schdlrs_onln = no_schedulers_online;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ dirty_cpu_scheds = no_schedulers;
+ dirty_cpu_scheds_online = no_schedulers_online;
+ dirty_io_scheds = 10;
+#endif
+
envbufsz = sizeof(envbuf);
/* erts_sys_getenv(_raw)() not initialized yet; need erts_sys_getenv__() */
@@ -808,7 +830,121 @@ early_init(int *argc, char **argv) /*
VERBOSE(DEBUG_SYSTEM,
("using %d:%d scheduler percentages\n",
schdlrs_percentage, schdlrs_onln_percentage));
- } else {
+ }
+#ifdef ERTS_DIRTY_SCHEDULERS
+ else if (argv[i][2] == 'D') {
+ char *arg;
+ char *type = argv[i]+3;
+ if (strcmp(type, "Pcpu") == 0) {
+ int ptot, ponln;
+ arg = get_arg(argv[i]+7, argv[i+1], &i);
+ switch (sscanf(arg, "%d:%d", &ptot, &ponln)) {
+ case 0:
+ switch (sscanf(arg, ":%d", &ponln)) {
+ case 1:
+ if (ponln < 0)
+ goto bad_SDPcpu;
+ ptot = 100;
+ goto chk_SDPcpu;
+ default:
+ goto bad_SDPcpu;
+ }
+ case 1:
+ if (ptot < 0)
+ goto bad_SDPcpu;
+ ponln = ptot < 100 ? ptot : 100;
+ goto chk_SDPcpu;
+ case 2:
+ if (ptot < 0 || ponln < 0)
+ goto bad_SDPcpu;
+ chk_SDPcpu:
+ dirty_cpu_scheds_pctg = ptot;
+ dirty_cpu_scheds_onln_pctg = ponln;
+ break;
+ default:
+ bad_SDPcpu:
+ erts_fprintf(stderr,
+ "bad dirty CPU schedulers percentage specifier %s\n",
+ arg);
+ erts_usage();
+ break;
+ }
+ VERBOSE(DEBUG_SYSTEM,
+ ("using %d:%d dirty CPU scheduler percentages\n",
+ dirty_cpu_scheds_pctg, dirty_cpu_scheds_onln_pctg));
+ } else if (strcmp(type, "cpu") == 0) {
+ int tot, onln;
+ arg = get_arg(argv[i]+6, argv[i+1], &i);
+ switch (sscanf(arg, "%d:%d", &tot, &onln)) {
+ case 0:
+ switch (sscanf(arg, ":%d", &onln)) {
+ case 1:
+ tot = no_schedulers;
+ goto chk_SDcpu;
+ default:
+ goto bad_SDcpu;
+ }
+ case 1:
+ onln = tot < dirty_cpu_scheds_online ?
+ tot : dirty_cpu_scheds_online;
+ case 2:
+ chk_SDcpu:
+ if (tot > 0)
+ dirty_cpu_scheds = tot;
+ else
+ dirty_cpu_scheds = no_schedulers + tot;
+ if (onln > 0)
+ dirty_cpu_scheds_online = onln;
+ else
+ dirty_cpu_scheds_online = no_schedulers_online + onln;
+ if (dirty_cpu_scheds < 1 ||
+ ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS < dirty_cpu_scheds) {
+ erts_fprintf(stderr,
+ "bad amount of dirty CPU schedulers %d\n",
+ tot);
+ erts_usage();
+ }
+ if (dirty_cpu_scheds_online < 1 ||
+ dirty_cpu_scheds < dirty_cpu_scheds_online) {
+ erts_fprintf(stderr,
+ "bad amount of dirty CPU schedulers online %d "
+ "(total amount of dirty CPU schedulers %d)\n",
+ dirty_cpu_scheds_online, dirty_cpu_scheds);
+ erts_usage();
+ }
+ break;
+ default:
+ bad_SDcpu:
+ erts_fprintf(stderr,
+ "bad amount of dirty CPU schedulers %s\n",
+ arg);
+ erts_usage();
+ break;
+ }
+ VERBOSE(DEBUG_SYSTEM,
+ ("using %d:%d dirty CPU scheduler(s)\n", tot, onln));
+ } else if (strcmp(type, "io") == 0) {
+ arg = get_arg(argv[i]+5, argv[i+1], &i);
+ dirty_io_scheds = atoi(arg);
+ if (dirty_io_scheds < 0 ||
+ dirty_io_scheds > ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS) {
+ erts_fprintf(stderr,
+ "bad number of dirty I/O schedulers %s\n",
+ arg);
+ erts_usage();
+ }
+ VERBOSE(DEBUG_SYSTEM,
+ ("using %d dirty I/O scheduler(s)\n", dirty_io_scheds));
+ } else {
+ erts_fprintf(stderr,
+ "bad or missing dirty scheduler specifier: %s\n",
+ argv[i]);
+ erts_usage();
+ break;
+ }
+ }
+#endif
+ else {
int tot, onln;
char *arg = get_arg(argv[i]+2, argv[i+1], &i);
switch (sscanf(arg, "%d:%d", &tot, &onln)) {
@@ -895,6 +1031,17 @@ early_init(int *argc, char **argv) /*
(void)schdlrs_percentage;
(void)schdlrs_onln_percentage;
#endif
+#ifdef ERTS_DIRTY_SCHEDULERS
+ /* apply any dirty scheduler precentages */
+ if (dirty_cpu_scheds_pctg != 100 || dirty_cpu_scheds_onln_pctg != 100) {
+ dirty_cpu_scheds = dirty_cpu_scheds * dirty_cpu_scheds_pctg / 100;
+ dirty_cpu_scheds_online = dirty_cpu_scheds_online * dirty_cpu_scheds_onln_pctg / 100;
+ }
+ if (dirty_cpu_scheds > schdlrs)
+ dirty_cpu_scheds = schdlrs;
+ if (dirty_cpu_scheds_online > schdlrs_onln)
+ dirty_cpu_scheds_online = schdlrs_onln;
+#endif
}
#ifndef USE_THREADS
@@ -907,6 +1054,11 @@ early_init(int *argc, char **argv) /*
erts_no_schedulers = (Uint) no_schedulers;
#endif
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_no_dirty_cpu_schedulers = dirty_cpu_scheds;
+ erts_no_dirty_cpu_schedulers_online = dirty_cpu_scheds_online;
+ erts_no_dirty_io_schedulers = dirty_io_scheds;
+#endif
erts_early_init_scheduling(no_schedulers);
alloc_opts.ncpu = ncpu;
@@ -924,10 +1076,18 @@ early_init(int *argc, char **argv) /*
*
* * Unmanaged threads that need to register:
* ** Async threads (see erl_async.c)
+ * ** Dirty scheduler threads
*/
erts_thr_progress_init(no_schedulers,
no_schedulers+2,
- erts_async_max_threads);
+#ifndef ERTS_DIRTY_SCHEDULERS
+ erts_async_max_threads
+#else
+ erts_async_max_threads +
+ erts_no_dirty_cpu_schedulers +
+ erts_no_dirty_io_schedulers
+#endif
+ );
#endif
erts_thr_q_init();
erts_init_utils();
@@ -1392,7 +1552,15 @@ erl_start(int argc, char **argv)
break;
case 'S' : /* Was handled in early_init() just read past it */
- if (argv[i][2] == 'P')
+ if (argv[i][2] == 'D') {
+ char* type = argv[i]+3;
+ if (strcmp(type, "Pcpu") == 0)
+ (void) get_arg(argv[i]+7, argv[i+1], &i);
+ if (strcmp(type, "cpu") == 0)
+ (void) get_arg(argv[i]+6, argv[i+1], &i);
+ else if (strcmp(type, "io") == 0)
+ (void) get_arg(argv[i]+5, argv[i+1], &i);
+ } else if (argv[i][2] == 'P')
(void) get_arg(argv[i]+3, argv[i+1], &i);
else
(void) get_arg(argv[i]+2, argv[i+1], &i);
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index 0dd83fa6ed..a8ff94ac89 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -123,6 +123,9 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "schdlr_sspnd", NULL },
{ "migration_info_update", NULL },
{ "run_queue", "address" },
+#ifdef ERTS_DIRTY_SCHEDULERS
+ { "dirty_run_queue_sleep_list", "address" },
+#endif
{ "process_table", NULL },
{ "cpu_info", NULL },
{ "pollset", "address" },
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index dc285b3cf7..e1e213c4eb 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -34,6 +34,7 @@
#include "beam_bp.h"
#include "erl_thr_progress.h"
#include "dtrace-wrapper.h"
+#include "erl_process.h"
#if defined(USE_DYNAMIC_TRACE) && (defined(USE_DTRACE) || defined(USE_SYSTEMTAP))
#define HAVE_USE_DTRACE 1
#endif
@@ -1451,6 +1452,156 @@ int enif_consume_timeslice(ErlNifEnv* env, int percent)
return ERTS_BIF_REDS_LEFT(env->proc) == 0;
}
+#ifdef ERTS_DIRTY_SCHEDULERS
+
+static void
+alloc_proc_psd(Process* proc, Export **ep)
+{
+ int i;
+ if (!*ep) {
+ *ep = erts_alloc(ERTS_ALC_T_PSD, sizeof(Export));
+ sys_memset((void*) *ep, 0, sizeof(Export));
+ for (i=0; i<ERTS_NUM_CODE_IX; i++) {
+ (*ep)->addressv[i] = &(*ep)->code[3];
+ }
+ (*ep)->code[3] = (BeamInstr) em_call_nif;
+ }
+ (void) ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(proc, ERTS_PROC_LOCK_MAIN, *ep);
+}
+
+static ERL_NIF_TERM
+execute_dirty_nif_finalizer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ Eterm* reg = ERTS_PROC_GET_SCHDATA(env->proc)->x_reg_array;
+ ERL_NIF_TERM result = (ERL_NIF_TERM) reg[0];
+ typedef ERL_NIF_TERM (*FinalizerFP)(ErlNifEnv*, ERL_NIF_TERM);
+ FinalizerFP fp;
+#if HAVE_INT64 && SIZEOF_LONG != 8
+ ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64));
+ enif_get_uint64(env, reg[1], (ErlNifUInt64 *) &fp);
+#else
+ ASSERT(sizeof(fp) <= sizeof(unsigned long));
+ enif_get_ulong(env, reg[1], (unsigned long *) &fp);
+#endif
+ return (*fp)(env, result);
+}
+
+#endif /* ERTS_DIRTY_SCHEDULERS */
+
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+
+ERL_NIF_TERM
+enif_schedule_dirty_nif(ErlNifEnv* env, int flags,
+ ERL_NIF_TERM (*fp)(ErlNifEnv*, int, const ERL_NIF_TERM[]),
+ int argc, const ERL_NIF_TERM argv[])
+{
+#ifdef USE_THREADS
+ erts_aint32_t state, n, a;
+ Process* proc = env->proc;
+ Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
+ Export* ep = NULL;
+ int i;
+
+ int chkflgs = (flags & (ERL_NIF_DIRTY_JOB_IO_BOUND|ERL_NIF_DIRTY_JOB_CPU_BOUND));
+ if (chkflgs != ERL_NIF_DIRTY_JOB_IO_BOUND && chkflgs != ERL_NIF_DIRTY_JOB_CPU_BOUND)
+ return enif_make_badarg(env);
+
+ a = erts_smp_atomic32_read_acqb(&proc->state);
+ while (1) {
+ n = state = a;
+ if (chkflgs == ERL_NIF_DIRTY_JOB_CPU_BOUND)
+ n |= ERTS_PSFLG_DIRTY_CPU_PROC;
+ else
+ n |= ERTS_PSFLG_DIRTY_IO_PROC;
+ a = erts_smp_atomic32_cmpxchg_mb(&proc->state, n, state);
+ if (a == state)
+ break;
+ }
+ if (!(ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc)))
+ alloc_proc_psd(proc, &ep);
+ ERTS_VBUMP_ALL_REDS(proc);
+ ep->code[2] = argc;
+ for (i = 0; i < argc; i++) {
+ reg[i] = (Eterm) argv[i];
+ }
+ proc->i = (BeamInstr*) ep->addressv[0];
+ ep->code[4] = (BeamInstr) fp;
+ proc->freason = TRAP;
+
+ return THE_NON_VALUE;
+#else
+ return (*fp)(env, argc, argv);
+#endif
+}
+
+ERL_NIF_TERM
+enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result,
+ ERL_NIF_TERM (*fp)(ErlNifEnv*, ERL_NIF_TERM))
+{
+#ifdef USE_THREADS
+ erts_aint32_t state, n, a;
+ Process* proc = env->proc;
+ Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
+ Export* ep;
+
+ a = erts_smp_atomic32_read_acqb(&proc->state);
+ while (1) {
+ n = state = a;
+ if (!(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)))
+ break;
+ n &= ~(ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC
+ |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q);
+ a = erts_smp_atomic32_cmpxchg_mb(&proc->state, n, state);
+ if (a == state)
+ break;
+ }
+ if (!(ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc)))
+ alloc_proc_psd(proc, &ep);
+ ERTS_VBUMP_ALL_REDS(proc);
+ ep->code[2] = 2;
+ reg[0] = (Eterm) result;
+#if HAVE_INT64 && SIZEOF_LONG != 8
+ ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64));
+ reg[1] = (Eterm) enif_make_uint64(env, (ErlNifUInt64) fp);
+#else
+ ASSERT(sizeof(fp) <= sizeof(unsigned long));
+ reg[1] = (Eterm) enif_make_ulong(env, (unsigned long) fp);
+#endif
+ proc->i = (BeamInstr*) ep->addressv[0];
+ ep->code[4] = (BeamInstr) execute_dirty_nif_finalizer;
+ proc->freason = TRAP;
+
+ return THE_NON_VALUE;
+#else
+ return (*fp)(env, result);
+#endif
+}
+
+/* A simple finalizer that just returns its result argument */
+ERL_NIF_TERM
+enif_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result)
+{
+ return result;
+}
+
+int
+enif_is_on_dirty_scheduler(ErlNifEnv* env)
+{
+ return ERTS_SCHEDULER_IS_DIRTY(env->proc->scheduler_data);
+}
+
+int
+enif_have_dirty_schedulers()
+{
+#ifdef USE_THREADS
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+#endif /* ERL_NIF_DIRTY_SCHEDULER_SUPPORT */
+
/***************************************************************************
** load_nif/2 **
***************************************************************************/
diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h
index 5f4dc21d5c..fb3c359ec9 100644
--- a/erts/emulator/beam/erl_nif.h
+++ b/erts/emulator/beam/erl_nif.h
@@ -23,7 +23,11 @@
#ifndef __ERL_NIF_H__
#define __ERL_NIF_H__
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "erl_native_features_config.h"
#include "erl_drv_nif.h"
/* Version history:
@@ -34,9 +38,14 @@
** 2.2: R14B03 enif_is_exception
** 2.3: R15 enif_make_reverse_list, enif_is_number
** 2.4: R16 enif_consume_timeslice
+** 2.5: R17 dirty schedulers
*/
#define ERL_NIF_MAJOR_VERSION 2
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+#define ERL_NIF_MINOR_VERSION 5
+#else
#define ERL_NIF_MINOR_VERSION 4
+#endif
#include <stdlib.h>
@@ -159,6 +168,14 @@ typedef int ErlNifTSDKey;
typedef ErlDrvThreadOpts ErlNifThreadOpts;
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+typedef enum
+{
+ ERL_NIF_DIRTY_JOB_CPU_BOUND = 1,
+ ERL_NIF_DIRTY_JOB_IO_BOUND = 2
+}ErlNifDirtyTaskFlags;
+#endif
+
#if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
# define ERL_NIF_API_FUNC_DECL(RET_TYPE, NAME, ARGS) RET_TYPE (*NAME) ARGS
typedef struct {
diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h
index 2f841645e1..f5b27dfdfa 100644
--- a/erts/emulator/beam/erl_nif_api_funcs.h
+++ b/erts/emulator/beam/erl_nif_api_funcs.h
@@ -141,6 +141,13 @@ ERL_NIF_API_FUNC_DECL(int,enif_is_number,(ErlNifEnv*, ERL_NIF_TERM term));
ERL_NIF_API_FUNC_DECL(void*,enif_dlopen,(const char* lib, void (*err_handler)(void*,const char*), void* err_arg));
ERL_NIF_API_FUNC_DECL(void*,enif_dlsym,(void* handle, const char* symbol, void (*err_handler)(void*,const char*), void* err_arg));
ERL_NIF_API_FUNC_DECL(int,enif_consume_timeslice,(ErlNifEnv*, int percent));
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif,(ErlNifEnv*,int,ERL_NIF_TERM (*)(ErlNifEnv*,int,const ERL_NIF_TERM[]),int,const ERL_NIF_TERM[]));
+ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM,ERL_NIF_TERM (*)(ErlNifEnv*,ERL_NIF_TERM)));
+ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM));
+ERL_NIF_API_FUNC_DECL(int,enif_is_on_dirty_scheduler,(ErlNifEnv*));
+ERL_NIF_API_FUNC_DECL(int,enif_have_dirty_schedulers,(void));
+#endif
/*
** Add new entries here to keep compatibility on Windows!!!
@@ -266,6 +273,13 @@ ERL_NIF_API_FUNC_DECL(int,enif_consume_timeslice,(ErlNifEnv*, int percent));
# define enif_dlopen ERL_NIF_API_FUNC_MACRO(enif_dlopen)
# define enif_dlsym ERL_NIF_API_FUNC_MACRO(enif_dlsym)
# define enif_consume_timeslice ERL_NIF_API_FUNC_MACRO(enif_consume_timeslice)
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+# define enif_schedule_dirty_nif ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif)
+# define enif_schedule_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif_finalizer)
+# define enif_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_dirty_nif_finalizer)
+# define enif_is_on_dirty_scheduler ERL_NIF_API_FUNC_MACRO(enif_is_on_dirty_scheduler)
+# define enif_have_dirty_schedulers ERL_NIF_API_FUNC_MACRO(enif_have_dirty_schedulers)
+#endif
/*
** Add new entries here
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 74cd84a998..937881212a 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -146,6 +146,11 @@ extern BeamInstr beam_continue_exit[];
int erts_sched_compact_load;
int erts_sched_balance_util = 0;
Uint erts_no_schedulers;
+#ifdef ERTS_DIRTY_SCHEDULERS
+Uint erts_no_dirty_cpu_schedulers;
+Uint erts_no_dirty_cpu_schedulers_online;
+Uint erts_no_dirty_io_schedulers;
+#endif
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_VERY_LAZY (4*1024*1024)
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_LAZY (512*1024)
@@ -259,6 +264,10 @@ ErtsAlignedRunQueue *erts_aligned_run_queues;
Uint erts_no_run_queues;
ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data;
+ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data;
+#endif
typedef union {
ErtsSchedulerSleepInfo ssi;
@@ -266,6 +275,12 @@ typedef union {
} ErtsAlignedSchedulerSleepInfo;
static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info;
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+static ErtsAlignedSchedulerSleepInfo *aligned_dirty_cpu_sched_sleep_info;
+static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info;
+#endif
+#endif
static Uint last_reductions;
static Uint last_exact_reductions;
@@ -332,6 +347,16 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
(ASSERT(-1 <= ((int) (IX)) \
&& ((int) (IX)) < ((int) erts_no_schedulers)), \
&aligned_sched_sleep_info[(IX)].ssi)
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \
+ (ASSERT(0 <= ((int) (IX)) \
+ && ((int) (IX)) < ((int) erts_no_dirty_cpu_schedulers)), \
+ &aligned_dirty_cpu_sched_sleep_info[(IX)].ssi)
+#define ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(IX) \
+ (ASSERT(0 <= ((int) (IX)) \
+ && ((int) (IX)) < ((int) erts_no_dirty_io_schedulers)), \
+ &aligned_dirty_io_sched_sleep_info[(IX)].ssi)
+#endif
#define ERTS_FOREACH_RUNQ(RQVAR, DO) \
do { \
@@ -519,6 +544,13 @@ erts_pre_init_process(void)
erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks
= ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].get_locks
+ = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS;
+ erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].set_locks
+ = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS;
+#endif
+
/* Check that we have locks for all entries */
for (ix = 0; ix < ERTS_PSD_SIZE; ix++) {
ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].get_locks);
@@ -931,7 +963,9 @@ reply_sched_wall_time(void *vswtrp)
ErlHeapFragment *bp = NULL;
ASSERT(esdp);
-
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
if (swtrp->set) {
if (!swtrp->enable && esdp->sched_wall_time.enabled) {
esdp->sched_wall_time.need = erts_sched_balance_util;
@@ -1015,6 +1049,9 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable)
if (!set && !esdp->sched_wall_time.enabled)
return THE_NON_VALUE;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
swtrp = swtreq_alloc();
ref = erts_make_ref(c_p);
@@ -1492,6 +1529,9 @@ erts_schedule_multi_misc_aux_work(int ignore_self,
if (ignore_self) {
ErtsSchedulerData *esdp = erts_get_scheduler_data();
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
if (esdp)
self = (int) esdp->no;
}
@@ -1601,7 +1641,7 @@ void
erts_alloc_notify_delayed_dealloc(int ix)
{
ErtsSchedulerData *esdp = erts_get_scheduler_data();
- if (esdp)
+ if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp))
schedule_aux_work_wakeup(&esdp->aux_work_data,
ix,
ERTS_SSI_AUX_WORK_DD);
@@ -1615,6 +1655,10 @@ erts_alloc_ensure_handle_delayed_dealloc_call(int ix)
{
#ifdef DEBUG
ErtsSchedulerData *esdp = erts_get_scheduler_data();
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (esdp && ERTS_SCHEDULER_IS_DIRTY(esdp))
+ return;
+#endif
ASSERT(!esdp || ix == (int) esdp->no);
#endif
set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
@@ -2220,6 +2264,9 @@ static ERTS_INLINE void
sched_active_sys(Uint no, ErtsRunQueue *rq)
{
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix));
+#endif
ASSERT(rq->waiting < 0);
rq->waiting *= -1;
rq->waiting--;
@@ -2276,6 +2323,9 @@ static ERTS_INLINE void
sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq)
{
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix));
+#endif
ASSERT(rq->waiting < 0);
rq->waiting *= -1;
}
@@ -2291,7 +2341,7 @@ sched_waiting(Uint no, ErtsRunQueue *rq)
else
rq->waiting++;
rq->woken = 0;
- if (erts_system_profile_flags.scheduler)
+ if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && erts_system_profile_flags.scheduler)
profile_scheduler(make_small(no), am_inactive);
}
@@ -2303,7 +2353,7 @@ sched_active(Uint no, ErtsRunQueue *rq)
rq->waiting++;
else
rq->waiting--;
- if (erts_system_profile_flags.scheduler)
+ if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && erts_system_profile_flags.scheduler)
profile_scheduler(make_small(no), am_active);
}
@@ -2317,7 +2367,7 @@ ongoing_multi_scheduling_block(void)
static ERTS_INLINE void
empty_runq_aux(ErtsRunQueue *rq, Uint32 old_flags)
{
- if (old_flags & ERTS_RUNQ_FLG_NONEMPTY) {
+ if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && old_flags & ERTS_RUNQ_FLG_NONEMPTY) {
#ifdef DEBUG
erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues);
/*
@@ -2357,7 +2407,7 @@ static ERTS_INLINE void
non_empty_runq(ErtsRunQueue *rq)
{
Uint32 old_flags = ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_NONEMPTY);
- if (!(old_flags & ERTS_RUNQ_FLG_NONEMPTY)) {
+ if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && (!(old_flags & ERTS_RUNQ_FLG_NONEMPTY))) {
#ifdef DEBUG
erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues);
/*
@@ -2602,18 +2652,37 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix))
+ erts_smp_spin_lock(&rq->sleepers.lock);
+#endif
flgs = sched_prep_spin_wait(ssi);
if (flgs & ERTS_SSI_FLG_SUSPENDED) {
/* Go suspend instead... */
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix))
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+#endif
return;
}
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) {
+ ssi->prev = NULL;
+ ssi->next = rq->sleepers.list;
+ if (rq->sleepers.list)
+ rq->sleepers.list->prev = ssi;
+ rq->sleepers.list = ssi;
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+ }
+#endif
+
/*
* If all schedulers are waiting, one of them *should*
* be waiting in erl_sys_schedule()
*/
- if (!prepare_for_sys_schedule()) {
+ if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule()) {
sched_waiting(esdp->no, rq);
@@ -2623,12 +2692,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
tse_wait:
- if (thr_prgr_active != working)
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && thr_prgr_active != working)
sched_wall_time_change(esdp, thr_prgr_active);
while (1) {
- aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
+ aux_work = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 :
+ erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active) {
erts_thr_progress_active(esdp, thr_prgr_active = 1);
@@ -2642,11 +2712,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (aux_work)
flgs = erts_smp_atomic32_read_acqb(&ssi->flags);
else {
- if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
- sched_wall_time_change(esdp, 0);
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+ if (thr_prgr_active) {
+ erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ erts_thr_progress_prepare_wait(esdp);
}
- erts_thr_progress_prepare_wait(esdp);
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
@@ -2661,7 +2733,8 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
} while (res == EINTR);
}
}
- erts_thr_progress_finalize_wait(esdp);
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp))
+ erts_thr_progress_finalize_wait(esdp);
}
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
@@ -2682,7 +2755,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
- if (!thr_prgr_active) {
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) {
erts_thr_progress_active(esdp, thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
@@ -2699,6 +2772,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
erts_smp_atomic32_set_relb(&function_calls, 0);
*fcalls = 0;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
sched_waiting_sys(esdp->no, rq);
@@ -2912,6 +2988,34 @@ wake_scheduler(ErtsRunQueue *rq)
erts_sched_finish_poke(ssi, flgs);
}
+#ifdef ERTS_DIRTY_SCHEDULERS
+static void
+wake_dirty_scheduler(ErtsRunQueue *rq)
+{
+ ErtsSchedulerSleepInfo *ssi;
+ ErtsSchedulerSleepList *sl;
+
+ ASSERT(ERTS_RUNQ_IX_IS_DIRTY(rq->ix));
+
+ sl = &rq->sleepers;
+ erts_smp_spin_lock(&sl->lock);
+ ssi = sl->list;
+ if (!ssi)
+ erts_smp_spin_unlock(&sl->lock);
+ else {
+ sl->list = NULL;
+ erts_smp_spin_unlock(&sl->lock);
+
+ ERTS_THR_MEMORY_BARRIER;
+ do {
+ ErtsSchedulerSleepInfo *wake_ssi = ssi;
+ ssi = ssi->next;
+ erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
+ } while (ssi);
+ }
+}
+#endif
+
#define ERTS_NO_USED_RUNQS_SHIFT 16
#define ERTS_NO_RUNQS_MASK 0xffff
@@ -3047,8 +3151,14 @@ static ERTS_INLINE void
smp_notify_inc_runq(ErtsRunQueue *runq)
{
#ifdef ERTS_SMP
- if (runq)
- wake_scheduler(runq);
+ if (runq) {
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix))
+ wake_dirty_scheduler(runq);
+ else
+#endif
+ wake_scheduler(runq);
+ }
#endif
}
@@ -4899,7 +5009,10 @@ erts_sched_set_wake_cleanup_threshold(char *str)
static void
init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp)
{
- awdp->sched_id = esdp ? (int) esdp->no : 0;
+ if (!esdp || ERTS_SCHEDULER_IS_DIRTY(esdp))
+ awdp->sched_id = 0;
+ else
+ awdp->sched_id = (int) esdp->no;
awdp->esdp = esdp;
awdp->ssi = esdp ? esdp->ssi : NULL;
#ifdef ERTS_SMP
@@ -4939,14 +5052,71 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp)
#endif
}
+static void
+init_scheduler_data(ErtsSchedulerData* esdp, int num,
+ ErtsSchedulerSleepInfo* ssi,
+ ErtsRunQueue* runq,
+ char** daww_ptr, size_t daww_sz)
+{
+#ifdef ERTS_SMP
+ erts_bits_init_state(&esdp->erl_bits_state);
+ esdp->match_pseudo_process = NULL;
+ esdp->free_process = NULL;
+#endif
+ esdp->x_reg_array =
+ erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER,
+ ERTS_X_REGS_ALLOCATED *
+ sizeof(Eterm));
+ esdp->f_reg_array =
+ erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER,
+ MAX_REG * sizeof(FloatDef));
+#if !HEAP_ON_C_STACK
+ esdp->num_tmp_heap_used = 0;
+#endif
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) {
+ esdp->no = 0;
+ esdp->dirty_no = (Uint) num;
+ }
+ else {
+ esdp->no = (Uint) num;
+ esdp->dirty_no = 0;
+ }
+#else
+ esdp->no = (Uint) num;
+#endif
+ esdp->ssi = ssi;
+ esdp->current_process = NULL;
+ esdp->current_port = NULL;
+
+ esdp->virtual_reds = 0;
+ esdp->cpu_id = -1;
+
+ erts_init_atom_cache_map(&esdp->atom_cache_map);
+
+ esdp->run_queue = runq;
+ esdp->run_queue->scheduler = esdp;
+
+ if (daww_ptr) {
+ init_aux_work_data(&esdp->aux_work_data, esdp, *daww_ptr);
+#ifdef ERTS_SMP
+ *daww_ptr += daww_sz;
+#endif
+ }
+
+ esdp->reductions = 0;
+
+ init_sched_wall_time(&esdp->sched_wall_time);
+ erts_port_task_handle_init(&esdp->nosuspend_port_task_handle);
+}
+
void
erts_init_scheduling(int no_schedulers, int no_schedulers_online)
{
int ix, n, no_ssi;
char *daww_ptr;
-#ifdef ERTS_SMP
size_t daww_sz;
-#endif
+ size_t size_runqs;
init_misc_op_list_alloc();
init_proc_sys_task_queues_alloc();
@@ -4967,19 +5137,26 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
/* Create and initialize run queues */
n = no_schedulers;
-
- erts_aligned_run_queues =
- erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS,
- sizeof(ErtsAlignedRunQueue) * n);
+ size_runqs = sizeof(ErtsAlignedRunQueue) * (n + ERTS_NUM_DIRTY_RUNQS);
+ erts_aligned_run_queues =
+ erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
#ifdef ERTS_SMP
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_aligned_run_queues += ERTS_NUM_DIRTY_RUNQS;
+#endif
erts_smp_atomic32_init_nob(&no_empty_run_queues, 0);
#endif
erts_no_run_queues = n;
- for (ix = 0; ix < n; ix++) {
+ for (ix = -(ERTS_NUM_DIRTY_RUNQS); ix < n; ix++) {
int pix, rix;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ErtsRunQueue *rq = ERTS_RUNQ_IX_IS_DIRTY(ix) ?
+ ERTS_DIRTY_RUNQ_IX(ix) : ERTS_RUNQ_IX(ix);
+#else
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+#endif
rq->ix = ix;
@@ -4990,6 +5167,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1));
erts_smp_cnd_init(&rq->cnd);
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+ if (ERTS_RUNQ_IX_IS_DIRTY(ix)) {
+ erts_smp_spinlock_init(&rq->sleepers.lock, "dirty_run_queue_sleep_list");
+ rq->sleepers.list = NULL;
+ }
+#endif
+#endif
+
rq->waiting = 0;
rq->woken = 0;
ERTS_RUNQ_FLGS_INIT(rq, ERTS_RUNQ_FLG_NONEMPTY);
@@ -5076,6 +5262,29 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
#ifdef ERTS_SMP
aligned_sched_sleep_info++;
+
+#ifdef ERTS_DIRTY_SCHEDULERS
+ aligned_dirty_cpu_sched_sleep_info =
+ erts_alloc_permanent_cache_aligned(
+ ERTS_ALC_T_SCHDLR_SLP_INFO,
+ erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo));
+ for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) {
+ ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi;
+ erts_smp_atomic32_init_nob(&ssi->flags, 0);
+ ssi->event = NULL; /* initialized in sched_thread_func */
+ erts_atomic32_init_nob(&ssi->aux_work, 0);
+ }
+ aligned_dirty_io_sched_sleep_info =
+ erts_alloc_permanent_cache_aligned(
+ ERTS_ALC_T_SCHDLR_SLP_INFO,
+ erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo));
+ for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) {
+ ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi;
+ erts_smp_atomic32_init_nob(&ssi->flags, 0);
+ ssi->event = NULL; /* initialized in sched_thread_func */
+ erts_atomic32_init_nob(&ssi->aux_work, 0);
+ }
+#endif
#endif
/* Create and initialize scheduler specific data */
@@ -5086,6 +5295,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
daww_sz*n);
#else
+ daww_sz = 0;
daww_ptr = NULL;
#endif
@@ -5095,46 +5305,32 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
for (ix = 0; ix < n; ix++) {
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix);
-#ifdef ERTS_SMP
- erts_bits_init_state(&esdp->erl_bits_state);
- esdp->match_pseudo_process = NULL;
- esdp->free_process = NULL;
-#endif
- esdp->x_reg_array =
- erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER,
- ERTS_X_REGS_ALLOCATED *
- sizeof(Eterm));
- esdp->f_reg_array =
- erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER,
- MAX_REG * sizeof(FloatDef));
-#if !HEAP_ON_C_STACK
- esdp->num_tmp_heap_used = 0;
-#endif
- esdp->no = (Uint) ix+1;
- esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
- esdp->current_process = NULL;
- esdp->current_port = NULL;
-
- esdp->virtual_reds = 0;
- esdp->cpu_id = -1;
-
- erts_init_atom_cache_map(&esdp->atom_cache_map);
-
- esdp->run_queue = ERTS_RUNQ_IX(ix);
- esdp->run_queue->scheduler = esdp;
+ init_scheduler_data(esdp, ix+1, ERTS_SCHED_SLEEP_INFO_IX(ix),
+ ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz);
+ }
- init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr);
+#ifdef ERTS_DIRTY_SCHEDULERS
#ifdef ERTS_SMP
- daww_ptr += daww_sz;
-#endif
-
- esdp->reductions = 0;
-
- init_sched_wall_time(&esdp->sched_wall_time);
-
- erts_port_task_handle_init(&esdp->nosuspend_port_task_handle);
-
+ erts_aligned_dirty_cpu_scheduler_data =
+ erts_alloc_permanent_cache_aligned(
+ ERTS_ALC_T_SCHDLR_DATA,
+ erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerData));
+ for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) {
+ ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix);
+ init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix),
+ ERTS_DIRTY_CPU_RUNQ, NULL, 0);
+ }
+ erts_aligned_dirty_io_scheduler_data =
+ erts_alloc_permanent_cache_aligned(
+ ERTS_ALC_T_SCHDLR_DATA,
+ erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerData));
+ for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) {
+ ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix);
+ init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix),
+ ERTS_DIRTY_IO_RUNQ, NULL, 0);
}
+#endif
+#endif
init_misc_aux_work();
#if !HALFWORD_HEAP
@@ -5198,6 +5394,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online)
#endif
}
erts_no_schedulers = 1;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ erts_no_dirty_cpu_schedulers = 0;
+ erts_no_dirty_io_schedulers = 0;
+#endif
#endif
erts_smp_atomic32_init_nob(&function_calls, 0);
@@ -5310,28 +5510,39 @@ check_enqueue_in_prio_queue(erts_aint32_t *prq_prio_p,
*prq_prio_p = aprio;
- max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK;
- max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS;
- max_qbit &= -max_qbit;
- /*
- * max_qbit now either contain bit set for highest prio queue or a bit
- * out of range (which will have a value larger than valid range).
- */
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (!(actual & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC))) {
+#endif
+ max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK;
+ max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS;
+ max_qbit &= -max_qbit;
+ /*
+ * max_qbit now either contain bit set for highest prio queue or a bit
+ * out of range (which will have a value larger than valid range).
+ */
- if (qbit >= max_qbit)
- return 0; /* Already queued in higher or equal prio */
+ if (qbit >= max_qbit)
+ return 0; /* Already queued in higher or equal prio */
- /* Need to enqueue (if already enqueued, it is in lower prio) */
- *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET;
+ /* Need to enqueue (if already enqueued, it is in lower prio) */
+ *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET;
- if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK))
- != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) {
- /*
- * Process struct already enqueued, or actual prio not
- * equal to user prio, i.e., enqueue using proxy.
- */
- return -1;
+ if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK))
+ != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) {
+ /*
+ * Process struct already enqueued, or actual prio not
+ * equal to user prio, i.e., enqueue using proxy.
+ */
+ return -1;
+ }
+#ifdef ERTS_DIRTY_SCHEDULERS
+ } else {
+ if (actual & ERTS_PSFLG_DIRTY_CPU_PROC)
+ *newp |= ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q;
+ else
+ *newp |= ERTS_PSFLG_DIRTY_IO_PROC_IN_Q;
}
+#endif
/*
* Enqueue using process struct.
@@ -5387,10 +5598,21 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces
}
else {
Process *sched_p;
- ErtsRunQueue *runq = erts_get_runq_proc(p);
+ ErtsRunQueue *runq;
ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & ERTS_PSFLG_ACTIVE_SYS));
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+ if (ERTS_PSFLG_DIRTY_CPU_PROC & a)
+ runq = ERTS_DIRTY_CPU_RUNQ;
+ else if (ERTS_PSFLG_DIRTY_IO_PROC & a)
+ runq = ERTS_DIRTY_IO_RUNQ;
+ else
+#endif
+#endif
+ runq = erts_get_runq_proc(p);
+
if (enqueue < 0)
sched_p = make_proxy_proc(proxy, p, enq_prio);
else {
@@ -5400,7 +5622,11 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces
}
#ifdef ERTS_SMP
- if (!(ERTS_PSFLG_BOUND & n)) {
+ if (!(ERTS_PSFLG_BOUND & n)
+#ifdef ERTS_DIRTY_SCHEDULERS
+ && !(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q))
+#endif
+ ) {
ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio);
if (new_runq) {
RUNQ_SET_RQ(&sched_p->run_queue, new_runq);
@@ -5827,6 +6053,9 @@ suspend_scheduler(ErtsSchedulerData *esdp)
* Regardless of why a scheduler is suspended, it ends up here.
*/
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
ASSERT(no != 1);
evacuate_run_queue(esdp->run_queue, &sbp);
@@ -5994,22 +6223,44 @@ ErtsSchedSuspendResult
erts_schedulers_state(Uint *total,
Uint *online,
Uint *active,
+ Uint *dirty_cpu,
+ Uint *dirty_cpu_online,
+ Uint *dirty_io,
int yield_allowed)
{
- int res;
+ int res = ERTS_SCHDLR_SSPND_EINVAL;
erts_aint32_t changing;
- erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing);
- if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER))
- res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
- else {
- *active = *online = schdlr_sspnd.online;
- if (ongoing_multi_scheduling_block())
- *active = 1;
- res = ERTS_SCHDLR_SSPND_DONE;
+ if (total) {
+ ASSERT(online);
+ ASSERT(active);
+ erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing);
+ if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER))
+ res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
+ else {
+ *active = *online = schdlr_sspnd.online;
+ if (ongoing_multi_scheduling_block())
+ *active = 1;
+ res = ERTS_SCHDLR_SSPND_DONE;
+ }
+ erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+ *total = erts_no_schedulers;
}
- erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- *total = erts_no_schedulers;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (dirty_cpu)
+ *dirty_cpu = erts_no_dirty_cpu_schedulers;
+ if (dirty_cpu_online)
+ *dirty_cpu_online = erts_no_dirty_cpu_schedulers_online;
+ if (dirty_io)
+ *dirty_io = erts_no_dirty_io_schedulers;
+#else
+ if (dirty_cpu)
+ *dirty_cpu = 0;
+ if (dirty_cpu_online)
+ *dirty_cpu_online = 0;
+ if (dirty_io)
+ *dirty_io = 0;
+#endif
return res;
}
@@ -6101,6 +6352,10 @@ erts_set_schedulers_online(Process *p,
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
wake_scheduler(rq);
}
+#ifdef ERTS_DIRTY_SCHEDULERS
+ wake_dirty_scheduler(ERTS_DIRTY_CPU_RUNQ);
+ wake_dirty_scheduler(ERTS_DIRTY_IO_RUNQ);
+#endif
}
}
@@ -6448,6 +6703,92 @@ sched_thread_func(void *vesdp)
return NULL;
}
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+static void*
+sched_dirty_cpu_thread_func(void *vesdp)
+{
+ ErtsThrPrgrCallbacks callbacks;
+ ErtsSchedulerData *esdp = vesdp;
+ Uint no = esdp->dirty_no;
+ ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch();
+ callbacks.arg = (void *) esdp->ssi;
+ callbacks.wakeup = thr_prgr_wakeup;
+ callbacks.prepare_wait = NULL;
+ callbacks.wait = NULL;
+ callbacks.finalize_wait = NULL;
+
+ erts_thr_progress_register_unmanaged_thread(&callbacks);
+#ifdef ERTS_ENABLE_LOCK_CHECK
+ {
+ char buf[31];
+ erts_snprintf(&buf[0], 31, "dirty cpu scheduler %beu", no);
+ erts_lc_set_thread_name(&buf[0]);
+ }
+#endif
+ erts_tsd_set(sched_data_key, vesdp);
+#if ERTS_USE_ASYNC_READY_Q
+ esdp->aux_work_data.async_ready.queue = NULL;
+#endif
+
+ erts_proc_lock_prepare_proc_lock_waiter();
+
+#ifdef HIPE
+ hipe_thread_signal_init();
+#endif
+ erts_thread_init_float();
+
+ process_main();
+ /* No schedulers should *ever* terminate */
+ erl_exit(ERTS_ABORT_EXIT,
+ "Dirty CPU scheduler thread number %beu terminated\n",
+ no);
+ return NULL;
+}
+
+static void*
+sched_dirty_io_thread_func(void *vesdp)
+{
+ ErtsThrPrgrCallbacks callbacks;
+ ErtsSchedulerData *esdp = vesdp;
+ Uint no = esdp->dirty_no;
+ ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch();
+ callbacks.arg = (void *) esdp->ssi;
+ callbacks.wakeup = thr_prgr_wakeup;
+ callbacks.prepare_wait = NULL;
+ callbacks.wait = NULL;
+ callbacks.finalize_wait = NULL;
+
+ erts_thr_progress_register_unmanaged_thread(&callbacks);
+#ifdef ERTS_ENABLE_LOCK_CHECK
+ {
+ char buf[31];
+ erts_snprintf(&buf[0], 31, "dirty io scheduler %beu", no);
+ erts_lc_set_thread_name(&buf[0]);
+ }
+#endif
+ erts_tsd_set(sched_data_key, vesdp);
+#if ERTS_USE_ASYNC_READY_Q
+ esdp->aux_work_data.async_ready.queue = NULL;
+#endif
+
+ erts_proc_lock_prepare_proc_lock_waiter();
+
+#ifdef HIPE
+ hipe_thread_signal_init();
+#endif
+ erts_thread_init_float();
+
+ process_main();
+ /* No schedulers should *ever* terminate */
+ erl_exit(ERTS_ABORT_EXIT,
+ "Dirty I/O scheduler thread number %beu terminated\n",
+ no);
+ return NULL;
+}
+#endif
+#endif
+
static ethr_tid aux_tid;
void
@@ -6498,6 +6839,26 @@ erts_start_schedulers(void)
erts_no_schedulers = actual;
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+ {
+ int ix;
+ for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) {
+ ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix);
+ res = ethr_thr_create(&esdp->tid,sched_dirty_cpu_thread_func,(void*)esdp,&opts);
+ if (res != 0)
+ erl_exit(1, "Failed to create dirty cpu scheduler thread %d\n", ix);
+ }
+ for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) {
+ ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix);
+ res = ethr_thr_create(&esdp->tid,sched_dirty_io_thread_func,(void*)esdp,&opts);
+ if (res != 0)
+ erl_exit(1, "Failed to create dirty io scheduler thread %d\n", ix);
+ }
+ }
+#endif
+#endif
+
ERTS_THR_MEMORY_BARRIER;
res = ethr_thr_create(&aux_tid, aux_thread, NULL, &opts);
@@ -7486,7 +7847,8 @@ Process *schedule(Process *p, int calls)
input_reductions = INPUT_REDUCTIONS;
}
- ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
+ ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data())
+ || !erts_thr_progress_is_blocking());
/*
* Clean up after the process being scheduled out.
@@ -7597,7 +7959,8 @@ Process *schedule(Process *p, int calls)
}
- ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
+ ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp)
+ || !erts_thr_progress_is_blocking());
check_activities_to_run: {
#ifdef ERTS_SMP
@@ -7611,22 +7974,25 @@ Process *schedule(Process *p, int calls)
erts_smp_runq_lock(rq);
}
- if (rq->check_balance_reds <= 0)
- check_balance(rq);
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+ if (rq->check_balance_reds <= 0)
+ check_balance(rq);
- ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
- mps = erts_get_migration_paths_managed();
- mp = &mps->mpath[rq->ix];
+ mps = erts_get_migration_paths_managed();
+ mp = &mps->mpath[rq->ix];
- if (mp->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK)
- immigrate(rq, mp);
+ if (mp->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK)
+ immigrate(rq, mp);
+ }
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
continue_check_activities_to_run:
flags = ERTS_RUNQ_FLGS_GET_NOB(rq);
continue_check_activities_to_run_known_flags:
- ASSERT(flags & ERTS_RUNQ_FLG_NONEMPTY);
+ ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp)
+ || flags & ERTS_RUNQ_FLG_NONEMPTY);
if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND|ERTS_RUNQ_FLG_SUSPENDED)) {
@@ -7641,7 +8007,7 @@ Process *schedule(Process *p, int calls)
}
}
- {
+ if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
erts_aint32_t aux_work;
int leader_update = erts_thr_progress_update(esdp);
aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
@@ -7653,9 +8019,9 @@ Process *schedule(Process *p, int calls)
handle_aux_work(&esdp->aux_work_data, aux_work, 0);
erts_smp_runq_lock(rq);
}
- }
- ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
+ ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
+ }
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
#else /* ERTS_SMP */
@@ -7684,7 +8050,7 @@ Process *schedule(Process *p, int calls)
if (flags & ERTS_RUNQ_FLG_INACTIVE)
empty_runq(rq);
else {
- if (try_steal_task(rq))
+ if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && try_steal_task(rq))
goto continue_check_activities_to_run;
empty_runq(rq);
@@ -7710,7 +8076,8 @@ Process *schedule(Process *p, int calls)
goto check_activities_to_run;
}
- else if (fcalls > input_reductions && prepare_for_sys_schedule()) {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) &&
+ (fcalls > input_reductions && prepare_for_sys_schedule())) {
/*
* Schedule system-level activities.
*/
@@ -7814,6 +8181,14 @@ Process *schedule(Process *p, int calls)
psflg_band_mask = ~(((erts_aint32_t) 1) << (ERTS_PSFLGS_GET_PRQ_PRIO(state)
+ ERTS_PSFLGS_IN_PRQ_MASK_OFFSET));
+#ifdef ERTS_DIRTY_SCHEDULERS
+ /* if a non-dirty scheduler picks up a process marked as already being
+ in a dirty run queue, just drop it and go get another process */
+ if (state & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) &&
+ !ERTS_SCHEDULER_IS_DIRTY(esdp))
+ goto pick_next_process;
+#endif
+
if (!(state & ERTS_PSFLG_PROXY))
psflg_band_mask &= ~ERTS_PSFLG_IN_RUNQ;
else {
@@ -7896,6 +8271,10 @@ Process *schedule(Process *p, int calls)
(UWord) esdp->no);
int migrated = old && old != esdp->no;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp));
+#endif
+
prio = (int) ERTS_PSFLGS_GET_USR_PRIO(state);
erts_smp_spin_lock(&erts_sched_stat.lock);
@@ -9706,8 +10085,12 @@ save_pending_exiter(Process *p)
non_empty_runq(rq);
erts_smp_runq_unlock(rq);
-
- wake_scheduler(rq);
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix))
+ wake_dirty_scheduler(rq);
+ else
+#endif
+ wake_scheduler(rq);
}
#endif
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 6155f99b85..a0519dc0d9 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -74,6 +74,10 @@ struct ErtsNodesMonitor_;
#define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT 0
#define ERTS_MAX_NO_OF_SCHEDULERS 1024
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS
+#define ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS
+#endif
#define ERTS_DEFAULT_MAX_PROCESSES (1 << 18)
@@ -103,6 +107,11 @@ extern Export exp_send, exp_receive, exp_timeout;
extern int erts_sched_compact_load;
extern int erts_sched_balance_util;
extern Uint erts_no_schedulers;
+#ifdef ERTS_DIRTY_SCHEDULERS
+extern Uint erts_no_dirty_cpu_schedulers;
+extern Uint erts_no_dirty_cpu_schedulers_online;
+extern Uint erts_no_dirty_io_schedulers;
+#endif
extern Uint erts_no_run_queues;
extern int erts_sched_thread_suggested_stack_size;
#define ERTS_SCHED_THREAD_MIN_STACK_SIZE 4 /* Kilo words */
@@ -275,6 +284,13 @@ typedef enum {
typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
+#ifdef ERTS_DIRTY_SCHEDULERS
+typedef struct {
+ erts_smp_spinlock_t lock;
+ ErtsSchedulerSleepInfo *list;
+} ErtsSchedulerSleepList;
+#endif
+
struct ErtsSchedulerSleepInfo_ {
#ifdef ERTS_SMP
ErtsSchedulerSleepInfo *next;
@@ -387,6 +403,12 @@ struct ErtsRunQueue_ {
erts_smp_mtx_t mtx;
erts_smp_cnd_t cnd;
+#ifdef ERTS_DIRTY_SCHEDULERS
+#ifdef ERTS_SMP
+ ErtsSchedulerSleepList sleepers;
+#endif
+#endif
+
ErtsSchedulerData *scheduler;
int waiting; /* < 0 in sys schedule; > 0 on cnd variable */
int woken;
@@ -547,7 +569,10 @@ struct ErtsSchedulerData_ {
#endif
ErtsSchedulerSleepInfo *ssi;
Process *current_process;
- Uint no; /* Scheduler number */
+ Uint no; /* Scheduler number for normal schedulers */
+#ifdef ERTS_DIRTY_SCHEDULERS
+ Uint dirty_no; /* Scheduler number for dirty schedulers */
+#endif
Port *current_port;
ErtsRunQueue *run_queue;
int virtual_reds;
@@ -574,6 +599,10 @@ typedef union {
} ErtsAlignedSchedulerData;
extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+#ifdef ERTS_DIRTY_SCHEDULERS
+extern ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data;
+extern ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data;
+#endif
#ifndef ERTS_SMP
extern ErtsSchedulerData *erts_scheduler_data;
@@ -685,8 +714,13 @@ erts_smp_reset_max_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi)
#define ERTS_PSD_DIST_ENTRY 3
#define ERTS_PSD_CALL_TIME_BP 4
#define ERTS_PSD_DELAYED_GC_TASK_QS 5
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT 6
+#define ERTS_PSD_SIZE 7
+#else
#define ERTS_PSD_SIZE 6
+#endif
typedef struct {
void *data[ERTS_PSD_SIZE];
@@ -713,6 +747,11 @@ typedef struct {
#define ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS ERTS_PROC_LOCK_MAIN
#define ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS ERTS_PROC_LOCK_MAIN
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS ERTS_PROC_LOCK_MAIN
+#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS ERTS_PROC_LOCK_MAIN
+#endif
+
typedef struct {
ErtsProcLocks get_locks;
ErtsProcLocks set_locks;
@@ -1026,6 +1065,12 @@ void erts_check_for_holes(Process* p);
#define ERTS_PSFLG_RUNNING_SYS ERTS_PSFLG_BIT(15)
#define ERTS_PSFLG_PROXY ERTS_PSFLG_BIT(16)
#define ERTS_PSFLG_DELAYED_SYS ERTS_PSFLG_BIT(17)
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_PSFLG_DIRTY_CPU_PROC ERTS_PSFLG_BIT(18)
+#define ERTS_PSFLG_DIRTY_IO_PROC ERTS_PSFLG_BIT(19)
+#define ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q ERTS_PSFLG_BIT(20)
+#define ERTS_PSFLG_DIRTY_IO_PROC_IN_Q ERTS_PSFLG_BIT(21)
+#endif
#define ERTS_PSFLGS_IN_PRQ_MASK (ERTS_PSFLG_IN_PRQ_MAX \
| ERTS_PSFLG_IN_PRQ_HIGH \
@@ -1231,12 +1276,46 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags;
(p)->flags &= ~F_TIMO; \
} while (0)
+#if defined(ERTS_DIRTY_SCHEDULERS) && defined(ERTS_SMP)
+#define ERTS_NUM_DIRTY_RUNQS 2
+#else
+#define ERTS_NUM_DIRTY_RUNQS 0
+#endif
+
#define ERTS_RUNQ_IX(IX) \
(ASSERT(0 <= (IX) && (IX) < erts_no_run_queues), \
&erts_aligned_run_queues[(IX)].runq)
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_RUNQ_IX_IS_DIRTY(IX) \
+ (-(ERTS_NUM_DIRTY_RUNQS) <= (IX) && (IX) < 0)
+#define ERTS_DIRTY_RUNQ_IX(IX) \
+ (ASSERT(ERTS_RUNQ_IX_IS_DIRTY(IX)), \
+ &erts_aligned_run_queues[(IX)].runq)
+#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[-1].runq)
+#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[-2].runq)
+#else
+#define ERTS_RUNQ_IX_IS_DIRTY(IX) 0
+#endif
#define ERTS_SCHEDULER_IX(IX) \
(ASSERT(0 <= (IX) && (IX) < erts_no_schedulers), \
&erts_aligned_scheduler_data[(IX)].esd)
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_DIRTY_CPU_SCHEDULER_IX(IX) \
+ (ASSERT(0 <= (IX) && (IX) < erts_no_dirty_cpu_schedulers), \
+ &erts_aligned_dirty_cpu_scheduler_data[(IX)].esd)
+#define ERTS_DIRTY_IO_SCHEDULER_IX(IX) \
+ (ASSERT(0 <= (IX) && (IX) < erts_no_dirty_io_schedulers), \
+ &erts_aligned_dirty_io_scheduler_data[(IX)].esd)
+#ifdef ERTS_SMP
+#define ERTS_SCHEDULER_IS_DIRTY(ESDP) \
+ ((ESDP)->dirty_no != 0)
+#else
+#define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0
+#endif
+#else
+#define ERTS_RUNQ_IX_IS_DIRTY(IX) 0
+#define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0
+#endif
void erts_pre_init_process(void);
void erts_late_init_process(void);
@@ -1439,9 +1518,11 @@ int erts_dbg_check_halloc_lock(Process *p);
void erts_dbg_multi_scheduling_return_trap(Process *, Eterm);
#endif
int erts_get_max_no_executing_schedulers(void);
-#ifdef ERTS_SMP
+#if defined(ERTS_SMP) || defined(ERTS_DIRTY_SCHEDULERS)
ErtsSchedSuspendResult
-erts_schedulers_state(Uint *, Uint *, Uint *, int);
+erts_schedulers_state(Uint *, Uint *, Uint *, Uint *, Uint *, Uint *, int);
+#endif
+#ifdef ERTS_SMP
ErtsSchedSuspendResult
erts_set_schedulers_online(Process *p,
ErtsProcLocks plocks,
@@ -1559,7 +1640,7 @@ do { \
ErtsSchedulerData *esdp__ = ((P) \
? ERTS_PROC_GET_SCHDATA((Process *) (P)) \
: erts_get_scheduler_data()); \
- if (esdp__) \
+ if (esdp__ && !ERTS_SCHEDULER_IS_DIRTY(esdp__)) \
esdp__->verify_unused_temp_alloc( \
esdp__->verify_unused_temp_alloc_data); \
} while (0)
@@ -1694,6 +1775,13 @@ erts_psd_set(Process *p, ErtsProcLocks plocks, int ix, void *data)
#define ERTS_PROC_SET_DELAYED_GC_TASK_QS(P, L, PBT) \
((ErtsProcSysTaskQs *) erts_psd_set((P), (L), ERTS_PSD_DELAYED_GC_TASK_QS, (void *) (PBT)))
+#ifdef ERTS_DIRTY_SCHEDULERS
+#define ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(P) \
+ ((Export *) erts_psd_get((P), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT))
+#define ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(P, L, DSTE) \
+ ((Export *) erts_psd_set((P), (L), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT, (void *) (DSTE)))
+#endif
+
ERTS_GLB_INLINE Eterm erts_proc_get_error_handler(Process *p);
ERTS_GLB_INLINE Eterm erts_proc_set_error_handler(Process *p,
@@ -1887,7 +1975,12 @@ Uint erts_get_scheduler_id(void)
{
#ifdef ERTS_SMP
ErtsSchedulerData *esdp = erts_get_scheduler_data();
- return esdp ? esdp->no : (Uint) 0;
+#ifdef ERTS_DIRTY_SCHEDULERS
+ if (esdp && ERTS_SCHEDULER_IS_DIRTY(esdp))
+ return 0;
+ else
+#endif
+ return esdp ? esdp->no : (Uint) 0;
#else
return erts_get_scheduler_data() ? (Uint) 1 : (Uint) 0;
#endif
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index 9a70e8646a..affb66289b 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -36,7 +36,7 @@
threading/1, send/1, send2/1, send3/1, send_threaded/1, neg/1,
is_checks/1,
get_length/1, make_atom/1, make_string/1, reverse_list_test/1,
- otp_9668/1, consume_timeslice/1
+ otp_9668/1, consume_timeslice/1, dirty_nif/1
]).
-export([many_args_100/100]).
@@ -63,7 +63,7 @@ all() ->
resource_takeover, threading, send, send2, send3,
send_threaded, neg, is_checks, get_length, make_atom,
make_string,reverse_list_test,
- otp_9668, consume_timeslice
+ otp_9668, consume_timeslice, dirty_nif
].
groups() ->
@@ -1343,6 +1343,20 @@ consume_timeslice(Config) when is_list(Config) ->
ok.
+dirty_nif(Config) when is_list(Config) ->
+ try erlang:system_info(dirty_cpu_schedulers) of
+ N when is_integer(N) ->
+ ensure_lib_loaded(Config),
+ Val1 = 42,
+ Val2 = "Erlang",
+ Val3 = list_to_binary([Val2, 0]),
+ {Val1, Val2, Val3} = call_dirty_nif(Val1, Val2, Val3),
+ ok
+ catch
+ error:badarg ->
+ {skipped,"No dirty scheduler support"}
+ end.
+
next_msg(Pid) ->
receive
M -> M
@@ -1472,6 +1486,7 @@ echo_int(_) -> ?nif_stub.
type_sizes() -> ?nif_stub.
otp_9668_nif(_) -> ?nif_stub.
consume_timeslice_nif(_,_) -> ?nif_stub.
+call_dirty_nif(_,_,_) -> ?nif_stub.
nif_stub_error(Line) ->
exit({nif_not_loaded,module,?MODULE,line,Line}).
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index 0c4a9f7e5c..6f902e186d 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -1494,6 +1494,48 @@ static ERL_NIF_TERM consume_timeslice_nif(ErlNifEnv* env, int argc, const ERL_NI
}
}
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+static ERL_NIF_TERM dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ int n;
+ char s[10];
+ ErlNifBinary b;
+ ERL_NIF_TERM result;
+ if (enif_have_dirty_schedulers()) {
+ assert(enif_is_on_dirty_scheduler(env));
+ }
+ assert(argc == 3);
+ enif_get_int(env, argv[0], &n);
+ enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1);
+ enif_inspect_binary(env, argv[2], &b);
+ result = enif_make_tuple3(env,
+ enif_make_int(env, n),
+ enif_make_string(env, s, ERL_NIF_LATIN1),
+ enif_make_binary(env, &b));
+ return enif_schedule_dirty_nif_finalizer(env, result, enif_dirty_nif_finalizer);
+}
+
+static ERL_NIF_TERM call_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ int n;
+ char s[10];
+ ErlNifBinary b;
+ assert(!enif_is_on_dirty_scheduler(env));
+ if (argc != 3)
+ return enif_make_badarg(env);
+ if (enif_have_dirty_schedulers()) {
+ if (enif_get_int(env, argv[0], &n) &&
+ enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1) &&
+ enif_inspect_binary(env, argv[2], &b))
+ return enif_schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_CPU_BOUND, dirty_nif, argc, argv);
+ else
+ return enif_make_badarg(env);
+ } else {
+ return dirty_nif(env, argc, argv);
+ }
+}
+#endif
+
static ErlNifFunc nif_funcs[] =
{
{"lib_version", 0, lib_version},
@@ -1543,7 +1585,10 @@ static ErlNifFunc nif_funcs[] =
{"echo_int", 1, echo_int},
{"type_sizes", 0, type_sizes},
{"otp_9668_nif", 1, otp_9668_nif},
- {"consume_timeslice_nif", 2, consume_timeslice_nif}
+ {"consume_timeslice_nif", 2, consume_timeslice_nif},
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+ {"call_dirty_nif", 3, call_dirty_nif},
+#endif
};
ERL_NIF_INIT(nif_SUITE,nif_funcs,load,reload,upgrade,unload)