/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2000-2016. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* %CopyrightEnd%
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "sys.h"
#include "erl_sys_driver.h"
#include "global.h"
#include "erl_threads.h"
#include "erl_thr_queue.h"
#include "erl_async.h"
#include "dtrace-wrapper.h"
#include "lttng-wrapper.h"
#define ERTS_MAX_ASYNC_READY_CALLS_IN_SEQ 20
#define ERTS_ASYNC_PRINT_JOB 0
typedef struct _erl_async {
DE_Handle* hndl; /* The DE_Handle is needed when port is gone */
Eterm port;
long async_id;
void* async_data;
ErlDrvPDL pdl;
void (*async_invoke)(void*);
void (*async_free)(void*);
Uint sched_id;
union {
ErtsThrQPrepEnQ_t *prep_enq;
ErtsThrQFinDeQ_t fin_deq;
} q;
} ErtsAsync;
/*
* We can do without the enqueue mutex since it isn't needed for
* thread safety. Its only purpose is to put async threads to sleep
* during a blast of ready async jobs. This in order to reduce
* contention on the enqueue end of the async ready queues. During
* such a blast without the enqueue mutex much cpu time is consumed
* by the async threads without them doing much progress which in turn
* slow down progress of scheduler threads.
*/
#define ERTS_USE_ASYNC_READY_ENQ_MTX 1
#if ERTS_USE_ASYNC_READY_ENQ_MTX
typedef struct {
erts_mtx_t enq_mtx;
} ErtsAsyncReadyQXData;
#endif
typedef struct {
#if ERTS_USE_ASYNC_READY_ENQ_MTX
union {
ErtsAsyncReadyQXData data;
char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(
sizeof(ErtsAsyncReadyQXData))];
} x;
#endif
ErtsThrQ_t thr_q;
ErtsThrQFinDeQ_t fin_deq;
} ErtsAsyncReadyQ;
typedef union {
ErtsAsyncReadyQ arq;
char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncReadyQ))];
} ErtsAlgndAsyncReadyQ;
typedef struct {
ErtsThrQ_t thr_q;
erts_tid_t thr_id;
} ErtsAsyncQ;
typedef union {
ErtsAsyncQ aq;
char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncQ))];
} ErtsAlgndAsyncQ;
typedef struct {
int no_initialized;
erts_mtx_t mtx;
erts_cnd_t cnd;
erts_atomic_t id;
} ErtsAsyncInit;
typedef struct {
union {
ErtsAsyncInit data;
char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncInit))];
} init;
ErtsAlgndAsyncQ *queue;
ErtsAlgndAsyncReadyQ *ready_queue;
} ErtsAsyncData;
#if defined(USE_VM_PROBES)
/*
* Some compilers, e.g. GCC 4.2.1 and -O3, will optimize away DTrace
* calls if they're the last thing in the function. :-(
* Many thanks to Trond Norbye, via:
* https://github.com/memcached/memcached/commit/6298b3978687530bc9d219b6ac707a1b681b2a46
*/
static unsigned gcc_optimizer_hack = 0;
#endif
int erts_async_max_threads; /* Initialized by erl_init.c */
int erts_async_thread_suggested_stack_size; /* Initialized by erl_init.c */
static ErtsAsyncData *async;
static void *async_main(void *);
static ERTS_INLINE ErtsAsyncQ *
async_q(int i)
{
return &async->queue[i].aq;
}
static ERTS_INLINE ErtsAsyncReadyQ *
async_ready_q(Uint sched_id)
{
return &async->ready_queue[((int)sched_id)-1].arq;
}
void
erts_init_async(void)
{
async = NULL;
if (erts_async_max_threads > 0) {
ErtsThrQInit_t qinit = ERTS_THR_Q_INIT_DEFAULT;
erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER;
char *ptr, thr_name[16];
size_t tot_size = 0;
int i;
tot_size += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncData));
tot_size += sizeof(ErtsAlgndAsyncQ)*erts_async_max_threads;
tot_size += sizeof(ErtsAlgndAsyncReadyQ)*erts_no_schedulers;
ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_ASYNC_DATA,
tot_size);
async = (ErtsAsyncData *) ptr;
ptr += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAsyncData));
async->init.data.no_initialized = 0;
erts_mtx_init(&async->init.data.mtx, "async_init_mtx", NIL,
ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER);
erts_cnd_init(&async->init.data.cnd);
erts_atomic_init_nob(&async->init.data.id, 0);
async->queue = (ErtsAlgndAsyncQ *) ptr;
ptr += sizeof(ErtsAlgndAsyncQ)*erts_async_max_threads;
qinit.live.queue = ERTS_THR_Q_LIVE_LONG;
qinit.live.objects = ERTS_THR_Q_LIVE_SHORT;
qinit.notify = erts_notify_check_async_ready_queue;
async->ready_queue = (ErtsAlgndAsyncReadyQ *) ptr;
ptr += sizeof(ErtsAlgndAsyncReadyQ)*erts_no_schedulers;
for (i = 1; i <= erts_no_schedulers; i++) {
ErtsAsyncReadyQ *arq = async_ready_q(i);
#if ERTS_USE_ASYNC_READY_ENQ_MTX
erts_mtx_init(&arq->x.data.enq_mtx, "async_enq_mtx", make_small(i),
ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER);
#endif
erts_thr_q_finalize_dequeue_state_init(&arq->fin_deq);
qinit.arg = (void *) (SWord) i;
erts_thr_q_initialize(&arq->thr_q, &qinit);
}
/* Create async threads... */
thr_opts.detached = 0;
thr_opts.suggested_stack_size
= erts_async_thread_suggested_stack_size;
thr_opts.name = thr_name;
for (i = 0; i < erts_async_max_threads; i++) {
ErtsAsyncQ *aq = async_q(i);
erts_snprintf(thr_opts.name, 16, "async_%d", i+1);
erts_thr_create(&aq->thr_id, async_main, (void*) aq, &thr_opts);
}
/* Wait for async threads to initialize... */
erts_mtx_lock(&async->init.data.mtx);
while (async->init.data.no_initialized != erts_async_max_threads)
erts_cnd_wait(&async->init.data.cnd, &async->init.data.mtx);
erts_mtx_unlock(&async->init.data.mtx);
erts_mtx_destroy(&async->init.data.mtx);
erts_cnd_destroy(&async->init.data.cnd);
}
}
void *
erts_get_async_ready_queue(Uint sched_id)
{
return (void *) async ? async_ready_q(sched_id) : NULL;
}
static ERTS_INLINE void async_add(ErtsAsync *a, ErtsAsyncQ* q)
{
#ifdef USE_VM_PROBES
int len;
#endif
if (is_internal_port(a->port)) {
ErtsAsyncReadyQ *arq = async_ready_q(a->sched_id);
a->q.prep_enq = erts_thr_q_prepare_enqueue(&arq->thr_q);
/* make sure the driver will stay around */
if (a->hndl)
erts_ddll_reference_referenced_driver(a->hndl);
}
#if ERTS_ASYNC_PRINT_JOB
erts_fprintf(stderr, "-> %ld\n", a->async_id);
#endif
erts_thr_q_enqueue(&q->thr_q, a);
#ifdef USE_LTTNG_VM_TRACEPOINTS
if (LTTNG_ENABLED(aio_pool_put)) {
lttng_decl_portbuf(port_str);
lttng_portid_to_str(a->port, port_str);
LTTNG2(aio_pool_put, port_str, -1);
}
#endif
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(aio_pool_add)) {
DTRACE_CHARBUF(port_str, 16);
erts_snprintf(port_str, sizeof(DTRACE_CHARBUF_NAME(port_str)),
"%T", a->port);
/* DTRACE TODO: Get the queue length from erts_thr_q_enqueue() ? */
len = -1;
DTRACE2(aio_pool_add, port_str, len);
}
gcc_optimizer_hack++;
#endif
}
static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q,
erts_tse_t *tse,
ErtsThrQPrepEnQ_t **prep_enq)
{
int saved_fin_deq = 0;
ErtsThrQFinDeQ_t fin_deq;
#ifdef USE_VM_PROBES
int len;
#endif
while (1) {
ErtsAsync *a = (ErtsAsync *) erts_thr_q_dequeue(q);
if (a) {
*prep_enq = a->q.prep_enq;
erts_thr_q_get_finalize_dequeue_data(q, &a->q.fin_deq);
if (saved_fin_deq)
erts_thr_q_append_finalize_dequeue_data(&a->q.fin_deq, &fin_deq);
#ifdef USE_LTTNG_VM_TRACEPOINTS
if (LTTNG_ENABLED(aio_pool_get)) {
lttng_decl_portbuf(port_str);
int length = erts_thr_q_length_dirty(q);
lttng_portid_to_str(a->port, port_str);
LTTNG2(aio_pool_get, port_str, length);
}
#endif
#ifdef USE_VM_PROBES
if (DTRACE_ENABLED(aio_pool_get)) {
DTRACE_CHARBUF(port_str, 16);
erts_snprintf(port_str, sizeof(DTRACE_CHARBUF_NAME(port_str)),
"%T", a->port);
/* DTRACE TODO: Get the length from erts_thr_q_dequeue() ? */
len = -1;
DTRACE2(aio_pool_get, port_str, len);
}
#endif
return a;
}
if (ERTS_THR_Q_DIRTY != erts_thr_q_clean(q)) {
ErtsThrQFinDeQ_t tmp_fin_deq;
erts_tse_reset(tse);
chk_fin_deq:
if (erts_thr_q_get_finalize_dequeue_data(q, &tmp_fin_deq)) {
if (!saved_fin_deq) {
erts_thr_q_finalize_dequeue_state_init(&fin_deq);
saved_fin_deq = 1;
}
erts_thr_q_append_finalize_dequeue_data(&fin_deq,
&tmp_fin_deq);
}
switch (erts_thr_q_inspect(q, 1)) {
case ERTS_THR_Q_DIRTY:
break;
case ERTS_THR_Q_NEED_THR_PRGR:
{
ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q);
erts_thr_progress_wakeup(NULL, prgr);
/*
* We do no dequeue finalizing in hope that a new async
* job will arrive before we are woken due to thread
* progress...
*/
erts_tse_wait(tse);
break;
}
case ERTS_THR_Q_CLEAN:
if (saved_fin_deq) {
if (erts_thr_q_finalize_dequeue(&fin_deq))
goto chk_fin_deq;
else
saved_fin_deq = 0;
}
erts_tse_wait(tse);
break;
default:
ASSERT(0);
break;
}
}
}
}
static ERTS_INLINE void call_async_ready(ErtsAsync *a)
{
Port *p = erts_id2port_sflgs(a->port,
NULL,
0,
ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP);
if (!p) {
if (a->async_free) {
ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_PORT);
a->async_free(a->async_data);
ERTS_MSACC_POP_STATE();
}
}
else {
if (async_ready(p, a->async_data)) {
if (a->async_free) {
ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_PORT);
a->async_free(a->async_data);
ERTS_MSACC_POP_STATE();
}
}
erts_port_release(p);
}
if (a->pdl)
driver_pdl_dec_refc(a->pdl);
if (a->hndl)
erts_ddll_dereference_driver(a->hndl);
}
static ERTS_INLINE void async_reply(ErtsAsync *a, ErtsThrQPrepEnQ_t *prep_enq)
{
ErtsAsyncReadyQ *arq;
#if ERTS_ASYNC_PRINT_JOB
erts_fprintf(stderr, "=>> %ld\n", a->async_id);
#endif
arq = async_ready_q(a->sched_id);
#if ERTS_USE_ASYNC_READY_ENQ_MTX
erts_mtx_lock(&arq->x.data.enq_mtx);
#endif
erts_thr_q_enqueue_prepared(&arq->thr_q, (void *) a, prep_enq);
#if ERTS_USE_ASYNC_READY_ENQ_MTX
erts_mtx_unlock(&arq->x.data.enq_mtx);
#endif
}
static void
async_wakeup(void *vtse)
{
erts_tse_set((erts_tse_t *) vtse);
}
static erts_tse_t *async_thread_init(ErtsAsyncQ *aq)
{
ErtsThrQInit_t qinit = ERTS_THR_Q_INIT_DEFAULT;
erts_tse_t *tse = erts_tse_fetch();
ERTS_DECLARE_DUMMY(Uint no);
ErtsThrPrgrCallbacks callbacks;
callbacks.arg = (void *) tse;
callbacks.wakeup = async_wakeup;
callbacks.prepare_wait = NULL;
callbacks.wait = NULL;
erts_thr_progress_register_unmanaged_thread(&callbacks);
qinit.live.queue = ERTS_THR_Q_LIVE_LONG;
qinit.live.objects = ERTS_THR_Q_LIVE_SHORT;
qinit.arg = (void *) tse;
qinit.notify = async_wakeup;
qinit.auto_finalize_dequeue = 0;
erts_thr_q_initialize(&aq->thr_q, &qinit);
/* Inform main thread that we are done initializing... */
erts_mtx_lock(&async->init.data.mtx);
no = async->init.data.no_initialized++;
erts_cnd_signal(&async->init.data.cnd);
erts_mtx_unlock(&async->init.data.mtx);
erts_msacc_init_thread("async", no, 0);
return tse;
}
static void *async_main(void* arg)
{
ErtsAsyncQ *aq = (ErtsAsyncQ *) arg;
erts_tse_t *tse = async_thread_init(aq);
ERTS_MSACC_DECLARE_CACHE();
while (1) {
ErtsThrQPrepEnQ_t *prep_enq;
ErtsAsync *a = async_get(&aq->thr_q, tse, &prep_enq);
if (is_nil(a->port))
break; /* Time to die */
ERTS_MSACC_UPDATE_CACHE();
#if ERTS_ASYNC_PRINT_JOB
erts_fprintf(stderr, "<- %ld\n", a->async_id);
#endif
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
a->async_invoke(a->async_data);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER);
async_reply(a, prep_enq);
}
return NULL;
}
void
erts_exit_flush_async(void)
{
int i;
ErtsAsync a;
a.port = NIL;
/*
* Terminate threads in order to flush queues. We do not
* bother to clean everything up since we are about to
* terminate the runtime system and a cleanup would only
* delay the termination.
*/
for (i = 0; i < erts_async_max_threads; i++)
async_add(&a, async_q(i));
for (i = 0; i < erts_async_max_threads; i++)
erts_thr_join(async->queue[i].aq.thr_id, NULL);
}
int erts_check_async_ready(void *varq)
{
ErtsAsyncReadyQ *arq = (ErtsAsyncReadyQ *) varq;
int res = 1;
int i;
for (i = 0; i < ERTS_MAX_ASYNC_READY_CALLS_IN_SEQ; i++) {
ErtsAsync *a = (ErtsAsync *) erts_thr_q_dequeue(&arq->thr_q);
if (!a) {
res = 0;
break;
}
#if ERTS_ASYNC_PRINT_JOB
erts_fprintf(stderr, "<<= %ld\n", a->async_id);
#endif
erts_thr_q_append_finalize_dequeue_data(&arq->fin_deq, &a->q.fin_deq);
call_async_ready(a);
erts_free(ERTS_ALC_T_ASYNC, (void *) a);
}
erts_thr_q_finalize_dequeue(&arq->fin_deq);
return res;
}
int erts_async_ready_clean(void *varq, void *val)
{
ErtsAsyncReadyQ *arq = (ErtsAsyncReadyQ *) varq;
ErtsThrQCleanState_t cstate;
cstate = erts_thr_q_clean(&arq->thr_q);
if (erts_thr_q_finalize_dequeue(&arq->fin_deq))
return ERTS_ASYNC_READY_DIRTY;
switch (cstate) {
case ERTS_THR_Q_DIRTY:
return ERTS_ASYNC_READY_DIRTY;
case ERTS_THR_Q_NEED_THR_PRGR:
*((ErtsThrPrgrVal *) val)
= erts_thr_q_need_thr_progress(&arq->thr_q);
return ERTS_ASYNC_READY_NEED_THR_PRGR;
case ERTS_THR_Q_CLEAN:
break;
}
return ERTS_ASYNC_READY_CLEAN;
}
/*
** Generate a fair async key prom an ErlDrvPort
** The port data gives a fair distribution grom port pointer
** to unsigned integer - to be used in key for driver_async below.
*/
unsigned int driver_async_port_key(ErlDrvPort port)
{
ErlDrvTermData td = driver_mk_port(port);
if (td == (ErlDrvTermData) NIL) {
return 0;
}
return (unsigned int) (UWord) internal_port_data(td);
}
/*
** Schedule async_invoke on a worker thread
** NOTE will be syncrounous when threads are unsupported
** return values:
** 0 completed
** -1 error
** N handle value
** arguments:
** ix driver index
** key pointer to secedule queue (NULL means round robin)
** async_invoke function to run in thread
** async_data data to pass to invoke function
** async_free function for relase async_data in case of failure
*/
long driver_async(ErlDrvPort ix, unsigned int* key,
void (*async_invoke)(void*), void* async_data,
void (*async_free)(void*))
{
ErtsAsync* a;
Port* prt;
long id;
unsigned int qix;
Uint sched_id;
ERTS_MSACC_PUSH_STATE();
sched_id = erts_get_scheduler_id();
if (!sched_id)
sched_id = 1;
prt = erts_drvport2port(ix);
if (prt == ERTS_INVALID_ERL_DRV_PORT)
return -1;
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt));
a = (ErtsAsync*) erts_alloc(ERTS_ALC_T_ASYNC, sizeof(ErtsAsync));
a->sched_id = sched_id;
a->hndl = (DE_Handle*)prt->drv_ptr->handle;
a->port = prt->common.id;
a->pdl = NULL;
a->async_data = async_data;
a->async_invoke = async_invoke;
a->async_free = async_free;
if (!async)
id = 0;
else {
do {
id = erts_atomic_inc_read_nob(&async->init.data.id);
} while (id == 0);
if (id < 0)
id *= -1;
ASSERT(id > 0);
}
a->async_id = id;
if (key == NULL) {
qix = (erts_async_max_threads > 0)
? (id % erts_async_max_threads) : 0;
}
else {
qix = (erts_async_max_threads > 0) ?
(*key % erts_async_max_threads) : 0;
*key = qix;
}
if (erts_async_max_threads > 0) {
if (prt->port_data_lock) {
driver_pdl_inc_refc(prt->port_data_lock);
a->pdl = prt->port_data_lock;
}
async_add(a, async_q(qix));
return id;
}
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
(*a->async_invoke)(a->async_data);
ERTS_MSACC_POP_STATE();
if (async_ready(prt, a->async_data)) {
if (a->async_free != NULL) {
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_PORT);
(*a->async_free)(a->async_data);
ERTS_MSACC_POP_STATE();
}
}
erts_free(ERTS_ALC_T_ASYNC, (void *) a);
return id;
}