From b17db5ae892a8b9c48f8f94f7219c60fe122f629 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Thu, 21 Sep 2017 19:21:18 +0200 Subject: erts: Fix bug in DistEntry refc dance to handle "lookup without refc++" correctly which was introduced in 4dcb2ae7810a507b701a30072b2f514cab7ebbdb. When decrementing refc to zero (in try_delete or prepare_try_delete) we must always wait thread progress to make sure no thread has done lookup without refc++ and is just about to do refc++ and thereby revive the DistEntry. That is, we wait for a potential other thread to either do refc++ or drop its pointer to the DistEntry. And if that other thread does refc++ (in erts_ref_dist_entry) it must also do the extra refc++ for the scheduled pending delete. --- erts/emulator/beam/erl_node_tables.c | 103 ++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 39 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index 0f3dfa797c..1ab096ab69 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -101,7 +101,9 @@ void erts_ref_dist_entry(DistEntry *dep) { ASSERT(dep); - de_refc_inc(dep, 1); + if (de_refc_inc_read(dep, 1) == 1) { + de_refc_inc(dep, 2); /* Pending delete */ + } } void @@ -396,44 +398,77 @@ erts_make_dhandle(Process *c_p, DistEntry *dep) return erts_mk_magic_ref(&hp, &c_p->off_heap, bin); } -static void try_delete_dist_entry(void *vbin); +static void start_timer_delete_dist_entry(void *vdep); +static void prepare_try_delete_dist_entry(void *vdep); +static void try_delete_dist_entry(DistEntry*); + +static void schedule_delete_dist_entry(DistEntry* dep) +{ + /* + * Here we need thread progress to wait for other threads, that may have + * done lookup without refc++, to do either refc++ or drop their refs. + * + * Note that timeouts do not guarantee thread progress. + */ + erts_schedule_thr_prgr_later_op(start_timer_delete_dist_entry, + dep, &dep->later_op); +} static void -prepare_try_delete_dist_entry(void *vbin) +start_timer_delete_dist_entry(void *vdep) { - Binary *bin = (Binary *) vbin; - DistEntry *dep = ErtsBin2DistEntry(bin); - Uint size; + if (node_tab_delete_delay == 0) { + prepare_try_delete_dist_entry(vdep); + } + else { + ASSERT(node_tab_delete_delay > 0); + erts_start_timer_callback(node_tab_delete_delay, + prepare_try_delete_dist_entry, + vdep); + } +} + +static void +prepare_try_delete_dist_entry(void *vdep) +{ + DistEntry *dep = vdep; erts_aint_t refc; + /* + * Time has passed since we decremented refc to zero and DistEntry may + * have been revived. Do a fast check without table lock first. + */ refc = de_refc_read(dep, 0); - if (refc > 0) - return; - - size = ERTS_MAGIC_BIN_SIZE(sizeof(DistEntry)); - erts_schedule_thr_prgr_later_cleanup_op(try_delete_dist_entry, - vbin, &dep->later_op, size); + if (refc == 0) { + try_delete_dist_entry(dep); + } + else { + /* + * Someone has done lookup and done refc++ for us. + */ + refc = de_refc_dec_read(dep, 0); + if (refc == 0) + schedule_delete_dist_entry(dep); + } } -static void try_delete_dist_entry(void *vbin) +static void try_delete_dist_entry(DistEntry* dep) { - Binary *bin = (Binary *) vbin; - DistEntry *dep = ErtsBin2DistEntry(bin); erts_aint_t refc; erts_rwmtx_rwlock(&erts_dist_table_rwmtx); /* * Another thread might have looked up this dist entry after * we decided to delete it (refc became zero). If so, the other - * thread incremented refc twice. Once for the new reference - * and once for this thread. + * thread incremented refc one extra step for this thread. * - * If refc reach -1, no one has used the entry since we - * set up the timer. Delete the entry. + * If refc reach -1, no one has done lookup and no one can do lookup + * as we have table lock. Delete the entry. * - * If refc reach 0, the entry is currently not in use - * but has been used since we set up the timer. Set up a - * new timer. + * If refc reach 0, someone raced us and either + * (1) did lookup with own refc++ and already released it again + * (2) did lookup without own refc++ + * Schedule new delete operation. * * If refc > 0, the entry is in use. Keep the entry. */ @@ -443,12 +478,7 @@ static void try_delete_dist_entry(void *vbin) erts_rwmtx_rwunlock(&erts_dist_table_rwmtx); if (refc == 0) { - if (node_tab_delete_delay == 0) - prepare_try_delete_dist_entry(vbin); - else if (node_tab_delete_delay > 0) - erts_start_timer_callback(node_tab_delete_delay, - prepare_try_delete_dist_entry, - vbin); + schedule_delete_dist_entry(dep); } } @@ -462,12 +492,7 @@ int erts_dist_entry_destructor(Binary *bin) if (refc == -1) return 1; /* Allow deallocation of structure... */ - if (node_tab_delete_delay == 0) - prepare_try_delete_dist_entry((void *) bin); - else if (node_tab_delete_delay > 0) - erts_start_timer_callback(node_tab_delete_delay, - prepare_try_delete_dist_entry, - (void *) bin); + schedule_delete_dist_entry(dep); return 0; } @@ -1463,9 +1488,9 @@ insert_delayed_delete_node(void *state, } static void -insert_thr_prgr_delete_dist_entry(void *arg, ErtsThrPrgrVal thr_prgr, void *vbin) +insert_thr_prgr_delete_dist_entry(void *arg, ErtsThrPrgrVal thr_prgr, void *vdep) { - DistEntry *dep = ErtsBin2DistEntry(vbin); + DistEntry *dep = vdep; Eterm heap[3]; insert_dist_entry(dep, SYSTEM_REF, @@ -1476,9 +1501,9 @@ insert_thr_prgr_delete_dist_entry(void *arg, ErtsThrPrgrVal thr_prgr, void *vbin static void insert_delayed_delete_dist_entry(void *state, ErtsMonotonicTime timeout_pos, - void *vbin) + void *vdep) { - DistEntry *dep = ErtsBin2DistEntry(vbin); + DistEntry *dep = vdep; Eterm heap[3]; insert_dist_entry(dep, SYSTEM_REF, @@ -1520,7 +1545,7 @@ setup_reference_table(void) erts_debug_callback_timer_foreach(prepare_try_delete_dist_entry, insert_delayed_delete_dist_entry, NULL); - erts_debug_later_op_foreach(try_delete_dist_entry, + erts_debug_later_op_foreach(start_timer_delete_dist_entry, insert_thr_prgr_delete_dist_entry, NULL); -- cgit v1.2.3