diff options
author | Sverker Eriksson <sverker@erlang.org> | 2013-08-02 15:04:14 +0200 |
---|---|---|
committer | Sverker Eriksson <sverker@erlang.org> | 2013-08-07 16:07:42 +0200 |
commit | f499b39ddc5025074c5d22f272f1dd55df79f009 (patch) | |
tree | 8abc375d0ca4b957c1302bada04d383364e5bdd3 | |
parent | 8cece79b77952c991e62ae595bcf71cde016a052 (diff) | |
download | otp-f499b39ddc5025074c5d22f272f1dd55df79f009.tar.gz otp-f499b39ddc5025074c5d22f272f1dd55df79f009.tar.bz2 otp-f499b39ddc5025074c5d22f272f1dd55df79f009.zip |
erts: Fix race in ptab that can cause PID mix-ups
Since: R16B01
Symptom: A spawned process may get the same PID as an existing
process. The new process will "steal" the PID and make the old process
unreachable through the PID.
The problem also applies to port identities but has only been seen
for processes.
Conditions: SMP emulator with at least two scheduler threads. Rapid
spawning and termination of a large number of processes. A small
number of free slots in the process table will also increase the risk
for this bug.
Workaround: Use command line options "+P legacy" and "+Q legacy"
Cause: The race happens if a process terminates and gets stalled while
releasing its process table slot. The stall has to be so long (due to OS
preemptive scheduling most probably) for other schedluer threads to
consume all other free slots for newly spawn processes.
Fix: Write invalid-markers in the free-pid-table and do atomic exhange
operations in retry-loops to make sure each thread gets a unique PID.
-rw-r--r-- | erts/emulator/beam/erl_ptab.c | 139 | ||||
-rw-r--r-- | erts/emulator/beam/erl_ptab.h | 2 |
2 files changed, 89 insertions, 52 deletions
diff --git a/erts/emulator/beam/erl_ptab.c b/erts/emulator/beam/erl_ptab.c index d69619dd44..fa5482b841 100644 --- a/erts/emulator/beam/erl_ptab.c +++ b/erts/emulator/beam/erl_ptab.c @@ -433,7 +433,7 @@ erts_ptab_mem_size(ErtsPTab *ptab) { UWord size = ptab->r.o.max*sizeof(erts_smp_atomic_t); if (ptab->r.o.free_id_data) - size += ptab->r.o.max*sizeof(Uint32); + size += ptab->r.o.max*sizeof(erts_smp_atomic32_t); return size; } @@ -474,7 +474,7 @@ erts_ptab_init_table(ErtsPTab *ptab, tab_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(size*sizeof(erts_smp_atomic_t)); alloc_sz = tab_sz; if (!legacy) - alloc_sz += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(size*sizeof(Uint32)); + alloc_sz += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(size*sizeof(erts_smp_atomic32_t)); ptab->r.o.tab = erts_alloc_permanent_cache_aligned(atype, alloc_sz); tab_end = ((char *) ptab->r.o.tab) + tab_sz; tab_entry = ptab->r.o.tab; @@ -497,6 +497,10 @@ erts_ptab_init_table(ErtsPTab *ptab, ASSERT(ptab->r.o.pix_cl_shift + ptab->r.o.pix_cli_shift == bits); + ptab->r.o.invalid_element = invalid_element; + ptab->r.o.invalid_data = erts_ptab_id2data(ptab, invalid_element->id); + ptab->r.o.release_element = release_element; + if (legacy) { ptab->r.o.free_id_data = NULL; ptab->r.o.dix_cl_mask = 0; @@ -506,11 +510,11 @@ erts_ptab_init_table(ErtsPTab *ptab, } else { - tab_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(size*sizeof(Uint32)); - ptab->r.o.free_id_data = (Uint32 *) tab_end; + tab_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(size*sizeof(erts_smp_atomic32_t)); + ptab->r.o.free_id_data = (erts_smp_atomic32_t *) tab_end; tab_cache_lines = tab_sz/ERTS_CACHE_LINE_SIZE; - ix_per_cache_line = (ERTS_CACHE_LINE_SIZE/sizeof(Uint32)); + ix_per_cache_line = (ERTS_CACHE_LINE_SIZE/sizeof(erts_smp_atomic32_t)); ptab->r.o.dix_cl_mask = tab_cache_lines-1; ptab->r.o.dix_cl_shift = erts_fit_in_bits_int32(ix_per_cache_line-1); @@ -525,7 +529,9 @@ erts_ptab_init_table(ErtsPTab *ptab, ix = 0; for (cl = 0; cl < tab_cache_lines; cl++) { for (cli = 0; cli < ix_per_cache_line; cli++) { - ptab->r.o.free_id_data[ix] = cli*tab_cache_lines+cl; + erts_smp_atomic32_init_nob(&ptab->r.o.free_id_data[ix], + cli*tab_cache_lines+cl); + ASSERT(erts_smp_atomic32_read_nob(&ptab->r.o.free_id_data[ix]) != ptab->r.o.invalid_data); ix++; } } @@ -534,9 +540,6 @@ erts_ptab_init_table(ErtsPTab *ptab, erts_smp_atomic32_init_nob(&ptab->vola.tile.fid_ix, -1); } - ptab->r.o.invalid_element = invalid_element; - ptab->r.o.invalid_data = erts_ptab_id2data(ptab, invalid_element->id); - ptab->r.o.release_element = release_element; erts_smp_interval_init(&ptab->list.data.interval); ptab->list.data.deleted.start = NULL; @@ -606,11 +609,13 @@ erts_ptab_new_element(ErtsPTab *ptab, = erts_smp_current_interval_nob(erts_ptab_interval(ptab)); if (ptab->r.o.free_id_data) { + do { + ix = (Uint32) erts_smp_atomic32_inc_read_acqb(&ptab->vola.tile.aid_ix); + ix = ix_to_free_id_data_ix(ptab, ix); - ix = (Uint32) erts_smp_atomic32_inc_read_acqb(&ptab->vola.tile.aid_ix); - ix = ix_to_free_id_data_ix(ptab, ix); - - data = ptab->r.o.free_id_data[ix]; + data = erts_smp_atomic32_xchg_nob(&ptab->r.o.free_id_data[ix], + (erts_aint32_t)ptab->r.o.invalid_data); + }while ((Eterm)data == ptab->r.o.invalid_data); init_ptab_el(init_arg, (Eterm) data); @@ -763,7 +768,7 @@ erts_ptab_delete_element(ErtsPTab *ptab, erts_smp_atomic_set_relb(&ptab->r.o.tab[pix], ERTS_AINT_NULL); if (ptab->r.o.free_id_data) { - + Uint32 prev_data; /* Next data for this slot... */ data = (Uint32) erts_ptab_id2data(ptab, ptab_el->id); data += ptab->r.o.max; @@ -772,14 +777,17 @@ erts_ptab_delete_element(ErtsPTab *ptab, data += ptab->r.o.max; data &= ~(~((Uint32) 0) << ERTS_PTAB_ID_DATA_SIZE); } - ASSERT(data != ptab->r.o.invalid_data); ASSERT(pix == erts_ptab_data2pix(ptab, data)); - ix = (Uint32) erts_smp_atomic32_inc_read_relb(&ptab->vola.tile.fid_ix); - ix = ix_to_free_id_data_ix(ptab, ix); - - ptab->r.o.free_id_data[ix] = data; + do { + ix = (Uint32) erts_smp_atomic32_inc_read_relb(&ptab->vola.tile.fid_ix); + ix = ix_to_free_id_data_ix(ptab, ix); + + prev_data = erts_smp_atomic32_cmpxchg_nob(&ptab->r.o.free_id_data[ix], + data, + ptab->r.o.invalid_data); + }while ((Eterm)prev_data != ptab->r.o.invalid_data); } ASSERT(erts_smp_atomic32_read_nob(&ptab->vola.tile.count) > 0); @@ -1392,6 +1400,31 @@ erts_ptab_init(void) * Debug stuff */ +static void assert_ptab_consistency(ErtsPTab *ptab) +{ +#ifdef DEBUG + if (ptab->r.o.free_id_data) { + Uint32 ix, pix, data; + int free_pids = 0; + int null_slots = 0; + + for (ix=0; ix < ptab->r.o.max; ix++) { + if (erts_smp_atomic32_read_nob(&ptab->r.o.free_id_data[ix]) != ptab->r.o.invalid_data) { + ++free_pids; + data = erts_smp_atomic32_read_nob(&ptab->r.o.free_id_data[ix]); + pix = erts_ptab_data2pix(ptab, (Eterm) data); + ASSERT(erts_ptab_pix2intptr_nob(ptab, pix) == ERTS_AINT_NULL); + } + if (erts_smp_atomic_read_nob(&ptab->r.o.tab[ix]) == ERTS_AINT_NULL) { + ++null_slots; + } + } + ASSERT(free_pids == null_slots); + ASSERT(free_pids == ptab->r.o.max - erts_smp_atomic32_read_nob(&ptab->vola.tile.count)); + } +#endif +} + Sint erts_ptab_test_next_id(ErtsPTab *ptab, int set, Uint next) { @@ -1402,46 +1435,49 @@ erts_ptab_test_next_id(ErtsPTab *ptab, int set, Uint next) erts_ptab_rwlock(ptab); + assert_ptab_consistency(ptab); + if (ptab->r.o.free_id_data) { - Uint32 aid_ix, dix; + Uint32 id_ix, dix; if (set) { - Uint32 max_ix, ser, num, start; + Uint32 i, max_ix, num, stop_id_ix; max_ix = ptab->r.o.max - 1; - ser = next & ~max_ix; - start = num = next & max_ix; - - aid_ix = (Uint32) erts_smp_atomic32_read_nob(&ptab->vola.tile.aid_ix) + 1; - - do { - Uint32 pix = erts_ptab_data2pix(ptab, num); + num = next; + id_ix = (Uint32) erts_smp_atomic32_read_nob(&ptab->vola.tile.aid_ix); + + for (i=0; i <= max_ix; ++i) { + Uint32 pix; + ++num; + num &= ~(~((Uint32) 0) << ERTS_PTAB_ID_DATA_SIZE); + if (num == ptab->r.o.invalid_data) { + num += ptab->r.o.max; + num &= ~(~((Uint32) 0) << ERTS_PTAB_ID_DATA_SIZE); + } + pix = erts_ptab_data2pix(ptab, num); if (ERTS_AINT_NULL == erts_ptab_pix2intptr_nob(ptab, pix)) { - dix = ix_to_free_id_data_ix(ptab, aid_ix); - ptab->r.o.free_id_data[dix] = ser + num; - ASSERT(pix == erts_ptab_data2pix(ptab, ser+num)); - if (aid_ix == max_ix) - aid_ix = 0; - else - aid_ix++; + ++id_ix; + dix = ix_to_free_id_data_ix(ptab, id_ix); + erts_smp_atomic32_set_nob(&ptab->r.o.free_id_data[dix], num); + ASSERT(pix == erts_ptab_data2pix(ptab, num)); } - if (num == max_ix) - num = 0; - else - num++; - } while (num != start); + } + erts_smp_atomic32_set_nob(&ptab->vola.tile.fid_ix, id_ix); -#ifdef DEBUG - if (aid_ix == 0) - aid_ix = max_ix; - else - aid_ix--; - ASSERT((aid_ix & max_ix) == (((Uint32) erts_smp_atomic32_read_nob(&ptab->vola.tile.fid_ix)) & max_ix)); -#endif + /* Write invalid_data in rest of free_id_data[]: */ + stop_id_ix = (1 + erts_smp_atomic32_read_nob(&ptab->vola.tile.aid_ix)) & max_ix; + while (1) { + id_ix = (id_ix+1) & max_ix; + if (id_ix == stop_id_ix) + break; + dix = ix_to_free_id_data_ix(ptab, id_ix); + erts_smp_atomic32_set_nob(&ptab->r.o.free_id_data[dix], + ptab->r.o.invalid_data); + } } - - aid_ix = (Uint32) erts_smp_atomic32_read_nob(&ptab->vola.tile.aid_ix) + 1; - dix = ix_to_free_id_data_ix(ptab, aid_ix); - res = (Sint) ptab->r.o.free_id_data[dix]; + id_ix = (Uint32) erts_smp_atomic32_read_nob(&ptab->vola.tile.aid_ix) + 1; + dix = ix_to_free_id_data_ix(ptab, id_ix); + res = (Sint) erts_smp_atomic32_read_nob(&ptab->r.o.free_id_data[dix]); } else { /* Deprecated legacy algorithm... */ @@ -1485,6 +1521,7 @@ erts_ptab_test_next_id(ErtsPTab *ptab, int set, Uint next) } } + assert_ptab_consistency(ptab); erts_ptab_rwunlock(ptab); return res; diff --git a/erts/emulator/beam/erl_ptab.h b/erts/emulator/beam/erl_ptab.h index c2d8bd9cad..e3e05f14af 100644 --- a/erts/emulator/beam/erl_ptab.h +++ b/erts/emulator/beam/erl_ptab.h @@ -100,7 +100,7 @@ typedef struct { typedef struct { erts_smp_atomic_t *tab; - Uint32 *free_id_data; + erts_smp_atomic32_t *free_id_data; Uint32 max; Uint32 pix_mask; Uint32 pix_cl_mask; |