From 1573603177f5ba641390c67b0515defb2786ecfb Mon Sep 17 00:00:00 2001
From: Sverker Eriksson <sverker@erlang.org>
Date: Tue, 6 Sep 2016 17:42:30 +0200
Subject: erts: Tweak ets grow/shrink to keep up at contention

---
 erts/emulator/beam/erl_db_hash.c | 287 +++++++++++++++++++++------------------
 erts/emulator/beam/erl_db_hash.h |   8 +-
 2 files changed, 161 insertions(+), 134 deletions(-)

(limited to 'erts')

diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index 3f7e14d15d..698e4c301b 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -194,6 +194,7 @@ static ERTS_INLINE int add_fixed_deletion(DbTableHash* tb, int ix,
 #ifdef ERTS_SMP
 #  define DB_HASH_LOCK_MASK (DB_HASH_LOCK_CNT-1)
 #  define GET_LOCK(tb,hval) (&(tb)->locks->lck_vec[(hval) & DB_HASH_LOCK_MASK].lck)
+#  define GET_LOCK_MAYBE(tb,hval) ((tb)->common.is_thread_safe ? NULL : GET_LOCK(tb,hval))
 
 /* Fine grained read lock */
 static ERTS_INLINE erts_smp_rwmtx_t* RLOCK_HASH(DbTableHash* tb, HashValue hval)
@@ -330,7 +331,9 @@ struct segment {
 
 /* An extended segment table */
 struct ext_segtab {
+#ifdef ERTS_SMP
     ErtsThrPrgrLaterOp lop;
+#endif
     struct segment** prev_segtab;  /* Used when table is shrinking */
     int prev_nsegs;                /* Size of prev_segtab */
     int nsegs;                     /* Size of this segtab */
@@ -355,14 +358,14 @@ static ERTS_INLINE void SET_SEGTAB(DbTableHash* tb,
 ** Forward decl's (static functions)
 */
 static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix);
-static int alloc_seg(DbTableHash *tb);
+static void alloc_seg(DbTableHash *tb);
 static int free_seg(DbTableHash *tb, int free_records);
 static HashDbTerm* next(DbTableHash *tb, Uint *iptr, erts_smp_rwmtx_t** lck_ptr,
 			HashDbTerm *list);
 static HashDbTerm* search_list(DbTableHash* tb, Eterm key, 
 			       HashValue hval, HashDbTerm *list);
-static void shrink(DbTableHash* tb, int nactive);
-static void grow(DbTableHash* tb, int nactive);
+static void shrink(DbTableHash* tb, int nitems);
+static void grow(DbTableHash* tb, int nitems);
 static Eterm build_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2,
 			   Uint sz, DbTableHash*);
 static int analyze_pattern(DbTableHash *tb, Eterm pattern, 
@@ -435,9 +438,10 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle);
 static ERTS_INLINE void try_shrink(DbTableHash* tb)
 {
     int nactive = NACTIVE(tb);
-    if (nactive > FIRST_SEGSZ && NITEMS(tb) < SHRINK_LIMIT(nactive)
+    int nitems = NITEMS(tb);
+    if (nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive)
 	&& !IS_FIXED(tb)) {
-	shrink(tb, nactive);
+	shrink(tb, nitems);
     }
 }	
 
@@ -839,7 +843,7 @@ Lnew:
     {
 	int nactive = NACTIVE(tb);       
 	if (nitems > GROW_LIMIT(nactive) && !IS_FIXED(tb)) {
-	    grow(tb, nactive);
+	    grow(tb, nitems);
 	}
     }
     CHECK_TABLES();
@@ -2417,7 +2421,7 @@ static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix)
 
 /* Extend table with one new segment
 */
-static int alloc_seg(DbTableHash *tb)
+static void alloc_seg(DbTableHash *tb)
 {    
     int seg_ix = SLOT_IX_TO_SEG_IX(tb->nslots);
     struct segment** segtab;
@@ -2435,7 +2439,6 @@ static int alloc_seg(DbTableHash *tb)
                                                      SIZEOF_SEGMENT(EXT_SEGSZ));
     sys_memset(segtab[seg_ix], 0, SIZEOF_SEGMENT(EXT_SEGSZ));
     tb->nslots += EXT_SEGSZ;
-    return 1;
 }
 
 #ifdef ERTS_SMP
@@ -2506,9 +2509,6 @@ static int free_seg(DbTableHash *tb, int free_records)
                 erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
                              SIZEOF_EXT_SEGTAB(est->nsegs));
         }
-        else {
-
-        }
     }
     seg_sz = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
     erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb, segp, SIZEOF_SEGMENT(seg_sz));
@@ -2584,85 +2584,93 @@ done_resizing(DbTableHash* tb)
 #endif
 }
 
-/* Grow table with one new bucket.
+/* Grow table with one or more new buckets.
 ** Allocate new segment if needed.
 */
-static void grow(DbTableHash* tb, int nactive)
+static void grow(DbTableHash* tb, int nitems)
 {
     HashDbTerm** pnext;
     HashDbTerm** to_pnext;
     HashDbTerm* p;
     erts_smp_rwmtx_t* lck;
-    int from_ix;
+    int nactive;
+    int from_ix, to_ix;
     int szm;
+    int loop_limit = 5;
 
-    if (!begin_resizing(tb))
-	return; /* already in progress */
-    if (NACTIVE(tb) != nactive) {
-	goto abort; /* already done (race) */
-    }
+    do {
+        if (!begin_resizing(tb))
+            return; /* already in progress */
+        nactive = NACTIVE(tb);
+        if (nitems <= GROW_LIMIT(nactive)) {
+            goto abort; /* already done (race) */
+        }
 
-    /* Ensure that the slot nactive exists */
-    if (nactive == tb->nslots) {
-	/* Time to get a new segment */    
-        ASSERT(((nactive-FIRST_SEGSZ) & EXT_SEGSZ_MASK) == 0);
-	if (!alloc_seg(tb)) goto abort;	    
-    }
-    ASSERT(nactive < tb->nslots);
+        /* Ensure that the slot nactive exists */
+        if (nactive == tb->nslots) {
+            /* Time to get a new segment */
+            ASSERT(((nactive-FIRST_SEGSZ) & EXT_SEGSZ_MASK) == 0);
+            alloc_seg(tb);
+        }
+        ASSERT(nactive < tb->nslots);
 
-    szm = erts_smp_atomic_read_nob(&tb->szm);
-    if (nactive <= szm) {
-	from_ix = nactive & (szm >> 1);
-    } else {
-	ASSERT(nactive == szm+1);
-	from_ix = 0;
-	szm = (szm<<1) | 1;
-    }
+        szm = erts_smp_atomic_read_nob(&tb->szm);
+        if (nactive <= szm) {
+            from_ix = nactive & (szm >> 1);
+        } else {
+            ASSERT(nactive == szm+1);
+            from_ix = 0;
+            szm = (szm<<1) | 1;
+        }
+        to_ix = nactive;
+
+        lck = WLOCK_HASH(tb, from_ix);
+        ERTS_SMP_ASSERT(lck == GET_LOCK_MAYBE(tb,to_ix));
+        /* Now a final double check (with the from_ix lock held)
+         * that we did not get raced by a table fixer.
+         */
+        if (IS_FIXED(tb)) {
+            WUNLOCK_HASH(lck);
+            goto abort;
+        }
+        erts_smp_atomic_set_nob(&tb->nactive, ++nactive);
+        if (from_ix == 0) {
+            if (DB_USING_FINE_LOCKING(tb))
+                erts_smp_atomic_set_relb(&tb->szm, szm);
+            else
+                erts_smp_atomic_set_nob(&tb->szm, szm);
+        }
+        done_resizing(tb);
 
-    lck = WLOCK_HASH(tb, from_ix);
-    /* Now a final double check (with the from_ix lock held)
-     * that we did not get raced by a table fixer.
-     */
-    if (IS_FIXED(tb)) {
-	WUNLOCK_HASH(lck);
-	goto abort;
-    }
-    erts_smp_atomic_inc_nob(&tb->nactive);
-    if (from_ix == 0) {
-	if (DB_USING_FINE_LOCKING(tb))
-	    erts_smp_atomic_set_relb(&tb->szm, szm);
-	else
-	    erts_smp_atomic_set_nob(&tb->szm, szm);
-    }
-    done_resizing(tb);
+        /* Finally, let's split the bucket. We try to do it in a smart way
+           to keep link order and avoid unnecessary updates of next-pointers */
+        pnext = &BUCKET(tb, from_ix);
+        p = *pnext;
+        to_pnext = &BUCKET(tb, to_ix);
+        while (p != NULL) {
+            if (p->hvalue == INVALID_HASH) { /* rare but possible with fine locking */
+                *pnext = p->next;
+                free_term(tb, p);
+                p = *pnext;
+            }
+            else {
+                int ix = p->hvalue & szm;
+                if (ix != from_ix) {
+                    ASSERT(ix == (from_ix ^ ((szm+1)>>1)));
+                    *to_pnext = p;
+                    /* Swap "from" and "to": */
+                    from_ix = ix;
+                    to_pnext = pnext;
+                }
+                pnext = &p->next;
+                p = *pnext;
+            }
+        }
+        *to_pnext = NULL;
+        WUNLOCK_HASH(lck);
 
-    /* Finally, let's split the bucket. We try to do it in a smart way
-       to keep link order and avoid unnecessary updates of next-pointers */
-    pnext = &BUCKET(tb, from_ix);
-    p = *pnext;
-    to_pnext = &BUCKET(tb, nactive);
-    while (p != NULL) {
-	if (p->hvalue == INVALID_HASH) { /* rare but possible with fine locking */
-	    *pnext = p->next;
-	    free_term(tb, p);
-	    p = *pnext;
-	}
-	else {
-	    int ix = p->hvalue & szm;
-	    if (ix != from_ix) {
-		ASSERT(ix == (from_ix ^ ((szm+1)>>1)));
-		*to_pnext = p;
-		/* Swap "from" and "to": */
-		from_ix = ix;
-		to_pnext = pnext;
-	    }
-	    pnext = &p->next;
-	    p = *pnext;		
-	}
-    }
-    *to_pnext = NULL;
+    }while (--loop_limit && nitems > GROW_LIMIT(nactive));
 
-    WUNLOCK_HASH(lck);
     return;
    
 abort:
@@ -2673,56 +2681,75 @@ abort:
 /* Shrink table by joining top bucket.
 ** Remove top segment if it gets empty.
 */
-static void shrink(DbTableHash* tb, int nactive)
-{     
-    if (!begin_resizing(tb))
-	return; /* already in progress */
-    if (NACTIVE(tb) == nactive) {
-	erts_smp_rwmtx_t* lck;
-	int src_ix = nactive - 1;
-	int low_szm = erts_smp_atomic_read_nob(&tb->szm) >> 1;
-	int dst_ix = src_ix & low_szm;
-
-	ASSERT(dst_ix < src_ix);
-	ASSERT(nactive > FIRST_SEGSZ);
-	lck = WLOCK_HASH(tb, dst_ix);
-	/* Double check for racing table fixers */
-	if (!IS_FIXED(tb)) {
-	    HashDbTerm** src_bp = &BUCKET(tb, src_ix);
-	    HashDbTerm** dst_bp = &BUCKET(tb, dst_ix);
-	    HashDbTerm** bp = src_bp;
-
-	    /* Q: Why join lists by appending "dst" at the end of "src"?
-	       A: Must step through "src" anyway to purge pseudo deleted. */
-	    while(*bp != NULL) {
-		if ((*bp)->hvalue == INVALID_HASH) {
-		    HashDbTerm* deleted = *bp;
-		    *bp = deleted->next;
-		    free_term(tb, deleted);
-		} else {
-		    bp = &(*bp)->next;
-		}
-	    }
-	    *bp = *dst_bp;
-	    *dst_bp = *src_bp;
-	    *src_bp = NULL;
-	    
-	    erts_smp_atomic_set_nob(&tb->nactive, src_ix);
-	    if (dst_ix == 0) {
-		erts_smp_atomic_set_relb(&tb->szm, low_szm);
-	    }
-	    WUNLOCK_HASH(lck);
-	    
-	    if (tb->nslots - src_ix >= EXT_SEGSZ) {
-		free_seg(tb, 0);
-	    }
-	}
-	else {
-	    WUNLOCK_HASH(lck);
-	}
+static void shrink(DbTableHash* tb, int nitems)
+{
+    HashDbTerm** src_bp;
+    HashDbTerm** dst_bp;
+    HashDbTerm** bp;
+    erts_smp_rwmtx_t* lck;
+    int src_ix, dst_ix, low_szm;
+    int nactive;
+    int loop_limit = 5;
 
-    }
-    /*else already done */
+    do {
+        if (!begin_resizing(tb))
+            return; /* already in progress */
+        nactive = NACTIVE(tb);
+        if (!(nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive))) {
+            goto abort; /* already done (race) */
+        }
+        src_ix = nactive - 1;
+        low_szm = erts_smp_atomic_read_nob(&tb->szm) >> 1;
+        dst_ix = src_ix & low_szm;
+
+        ASSERT(dst_ix < src_ix);
+        ASSERT(nactive > FIRST_SEGSZ);
+        lck = WLOCK_HASH(tb, dst_ix);
+        ERTS_SMP_ASSERT(lck == GET_LOCK_MAYBE(tb,src_ix));
+        /* Double check for racing table fixers */
+        if (IS_FIXED(tb)) {
+            WUNLOCK_HASH(lck);
+            goto abort;
+        }
+
+        src_bp = &BUCKET(tb, src_ix);
+        dst_bp = &BUCKET(tb, dst_ix);
+        bp = src_bp;
+
+        /*
+         * We join lists by appending "dst" at the end of "src"
+         * as we must step through "src" anyway to purge pseudo deleted.
+         */
+        while(*bp != NULL) {
+            if ((*bp)->hvalue == INVALID_HASH) {
+                HashDbTerm* deleted = *bp;
+                *bp = deleted->next;
+                free_term(tb, deleted);
+            } else {
+                bp = &(*bp)->next;
+            }
+        }
+        *bp = *dst_bp;
+        *dst_bp = *src_bp;
+        *src_bp = NULL;
+
+        nactive = src_ix;
+        erts_smp_atomic_set_nob(&tb->nactive, nactive);
+        if (dst_ix == 0) {
+            erts_smp_atomic_set_relb(&tb->szm, low_szm);
+        }
+        WUNLOCK_HASH(lck);
+
+        if (tb->nslots - src_ix >= EXT_SEGSZ) {
+            free_seg(tb, 0);
+        }
+        done_resizing(tb);
+
+    } while (--loop_limit
+             && nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive));
+    return;
+
+abort:
     done_resizing(tb);
 }
 
@@ -2892,7 +2919,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
             nactive = NACTIVE(tb);
 
             if (nitems > GROW_LIMIT(nactive) && !IS_FIXED(tb)) {
-                grow(tb, nactive);
+                grow(tb, nitems);
             }
         } else {
             WUNLOCK_HASH(lck);
@@ -2997,7 +3024,6 @@ Eterm erts_ets_hash_get_memstate(Process* p, DbTableHash* tb)
 Eterm erts_ets_hash_restore_memstate(DbTableHash* tb, Eterm memstate)
 {
     int seg_cnt, target;
-    int nactive;
 
     if (!is_small(memstate))
         return make_small(__LINE__);
@@ -3009,7 +3035,6 @@ Eterm erts_ets_hash_restore_memstate(DbTableHash* tb, Eterm memstate)
         while (!begin_resizing(tb))
             /*spin*/;
         seg_cnt = SLOT_IX_TO_SEG_IX(tb->nslots);
-        nactive = NACTIVE(tb);
         done_resizing(tb);
 
         if (target == seg_cnt)
@@ -3017,9 +3042,9 @@ Eterm erts_ets_hash_restore_memstate(DbTableHash* tb, Eterm memstate)
         if (IS_FIXED(tb))
             return make_small(__LINE__);
         if (target < seg_cnt)
-            shrink(tb, nactive);
+            shrink(tb, 0);
         else
-            grow(tb, nactive);
+            grow(tb, INT_MAX);
     }
 }
 
@@ -3031,7 +3056,7 @@ void db_check_table_hash(DbTable *tbl)
     HashDbTerm* list;
     int j;
     
-    for (j = 0; j < tb->nactive; j++) {
+    for (j = 0; j < NACTIVE(tb); j++) {
 	if ((list = BUCKET(tb,j)) != 0) {
 	    while (list != 0) {
 		if (!is_tuple(make_tuple(list->dbterm.tpl))) {
diff --git a/erts/emulator/beam/erl_db_hash.h b/erts/emulator/beam/erl_db_hash.h
index 11bd6aa32a..6d25c73549 100644
--- a/erts/emulator/beam/erl_db_hash.h
+++ b/erts/emulator/beam/erl_db_hash.h
@@ -50,17 +50,19 @@ typedef struct db_table_hash_fine_locks {
 typedef struct db_table_hash {
     DbTableCommon common;
 
+    /* SMP: szm and nactive are write-protected by is_resizing or table write lock */
+    erts_smp_atomic_t szm;     /* current size mask. */
+    erts_smp_atomic_t nactive; /* Number of "active" slots */
+
     erts_smp_atomic_t segtab;  /* The segment table (struct segment**) */
     struct segment* first_segtab[1];
-    erts_smp_atomic_t szm;     /* current size mask. */
-    
+
     /* SMP: nslots and nsegs are protected by is_resizing or table write lock */
     int nslots;       /* Total number of slots */
     int nsegs;        /* Size of segment table */
 
     /* List of slots where elements have been deleted while table was fixed */
     erts_smp_atomic_t fixdel;  /* (FixedDeletion*) */	
-    erts_smp_atomic_t nactive; /* Number of "active" slots */
 #ifdef ERTS_SMP
     erts_smp_atomic_t is_resizing; /* grow/shrink in progress */
     DbTableHashFineLocks* locks;
-- 
cgit v1.2.3