Allow erlang:finish_loading/1 to load more than one module

The BIFs prepare_loading/2 and finish_loading/1 have been designed to allow fast loading in parallel of many modules. Because of the complications with on_load functions, the initial implementation of finish_loading/1 only allowed a single element in the list of prepared modules. finish_loading/1 does not suspend other processes, but it must wait for all schedulers to pass a write barrier ("thread progress"). The time for all schedulers to pass the write barrier is highly variable, depending on what kind of code they are executing. Therefore, allowing finish_loading/1 to finish the loading for more than one module before passing the write barrier could potentially be much faster than calling finish_loading/1 multiple times. The test case many/1 run on my computer shows that with "heavy load", finish loading of 100 modules in parallel is almost 50 times faster than loading them sequentially. With "light load", the gain is still almost 10 times. Here follows an actual sample of the output from the test case on my computer (an 2012 iMac): Light load ========== Sequential: 22361 µs Parallel: 2586 µs Ratio: 9 Heavy load ========== Sequential: 254512 µs Parallel: 5246 µs Ratio: 49
author: Björn Gustavsson <[email protected]> 2015-11-30 15:35:47 +0100
committer: Björn Gustavsson <[email protected]> 2016-02-25 14:50:48 +0100
commit: 8f4c278b69fe4d613a0b865a2edac43231cad913 (patch)
tree: 3e55347065faba3a2ae5950935da336fc8ffe7ab /erts/emulator/beam/beam_ranges.c
parent: e1be12434b06fb2594af5cdafc5efc5b9182d8b6 (diff)
download: otp-8f4c278b69fe4d613a0b865a2edac43231cad913.tar.gz
otp-8f4c278b69fe4d613a0b865a2edac43231cad913.tar.bz2
otp-8f4c278b69fe4d613a0b865a2edac43231cad913.zip
1 files changed, 68 insertions, 84 deletions
diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c
index 5a2b66727a..54c337ee72 100644
--- a/erts/emulator/beam/beam_ranges.c
+++ b/erts/emulator/beam/beam_ranges.c
@@ -53,6 +53,7 @@ struct ranges {
 };
 static struct ranges r[ERTS_NUM_CODE_IX];
 static erts_smp_atomic_t mem_used;
+static Range* write_ptr;
 
 #ifdef HARD_DEBUG
 static void check_consistency(struct ranges* p)
@@ -72,6 +73,17 @@ static void check_consistency(struct ranges* p)
 #  define CHECK(r)
 #endif /* HARD_DEBUG */
 
+static int
+rangecompare(Range* a, Range* b)
+{
+    if (a->start < b->start) {
+	return -1;
+    } else if (a->start == b->start) {
+	return 0;
+    } else {
+	return 1;
+    }
+}
 
 void
 erts_init_ranges(void)
@@ -88,45 +100,70 @@ erts_init_ranges(void)
 }
 
 void
-erts_start_staging_ranges(void)
+erts_start_staging_ranges(int num_new)
 {
+    ErtsCodeIndex src = erts_active_code_ix();
     ErtsCodeIndex dst = erts_staging_code_ix();
+    Sint need;
 
     if (r[dst].modules) {
 	erts_smp_atomic_add_nob(&mem_used, -r[dst].allocated);
 	erts_free(ERTS_ALC_T_MODULE_REFS, r[dst].modules);
-	r[dst].modules = NULL;
     }
+
+    need = r[dst].allocated = r[src].n + num_new;
+    erts_smp_atomic_add_nob(&mem_used, need);
+    write_ptr = erts_alloc(ERTS_ALC_T_MODULE_REFS,
+			   need * sizeof(Range));
+    r[dst].modules = write_ptr;
 }
 
 void
 erts_end_staging_ranges(int commit)
 {
-    ErtsCodeIndex dst = erts_staging_code_ix();
-
-    if (commit && r[dst].modules == NULL) {
+    if (commit) {
 	Sint i;
-	Sint n;
-
-	/* No modules added, just clone src and remove purged code. */
 	ErtsCodeIndex src = erts_active_code_ix();
+	ErtsCodeIndex dst = erts_staging_code_ix();
+	Range* mp;
+	Sint num_inserted;
 
-	erts_smp_atomic_add_nob(&mem_used, r[src].n);
-	r[dst].modules = erts_alloc(ERTS_ALC_T_MODULE_REFS,
-				    r[src].n * sizeof(Range));
-	r[dst].allocated = r[src].n;
-	n = 0;
+	mp = r[dst].modules;
+	num_inserted = write_ptr - mp;
 	for (i = 0; i < r[src].n; i++) {
 	    Range* rp = r[src].modules+i;
 	    if (rp->start < RANGE_END(rp)) {
 		/* Only insert a module that has not been purged. */
-		r[dst].modules[n] = *rp;
-		n++;
+		write_ptr->start = rp->start;
+		erts_smp_atomic_init_nob(&write_ptr->end,
+					 (erts_aint_t)(RANGE_END(rp)));
+		write_ptr++;
+	    }
+	}
+
+	/*
+	 * There are num_inserted new range entries (unsorted) at the
+	 * beginning of the modules array, followed by the old entries
+	 * (sorted). We must now sort the entire array.
+	 */
+
+	r[dst].n = write_ptr - mp;
+	if (num_inserted > 1) {
+	    qsort(mp, r[dst].n, sizeof(Range),
+		  (int (*)(const void *, const void *)) rangecompare);
+	} else if (num_inserted == 1) {
+	    /* Sift the new range into place. This is faster than qsort(). */
+	    Range t = mp[0];
+	    for (i = 0; i < r[dst].n-1 && t.start > mp[i+1].start; i++) {
+		mp[i] = mp[i+1];
 	    }
+	    mp[i] = t;
 	}
-	r[dst].n = n;
+	r[dst].modules = mp;
+	CHECK(&r[dst]);
 	erts_smp_atomic_set_nob(&r[dst].mid,
-				(erts_aint_t) (r[dst].modules + n / 2));
+				(erts_aint_t) (r[dst].modules +
+					       r[dst].n / 2));
     }
 }
 
@@ -135,82 +172,29 @@ erts_update_ranges(BeamInstr* code, Uint size)
 {
     ErtsCodeIndex dst = erts_staging_code_ix();
     ErtsCodeIndex src = erts_active_code_ix();
-    Sint i;
-    Sint n;
-    Sint need;
 
     if (src == dst) {
 	ASSERT(!erts_initialized);
 
 	/*
-	 * During start-up of system, the indices are the same.
-	 * Handle this by faking a source area.
+	 * During start-up of system, the indices are the same
+	 * and erts_start_staging_ranges() has not been called.
 	 */
-	src = (src+1) % ERTS_NUM_CODE_IX;
-	if (r[src].modules) {
-	    erts_smp_atomic_add_nob(&mem_used, -r[src].allocated);
-	    erts_free(ERTS_ALC_T_MODULE_REFS, r[src].modules);
+	if (r[dst].modules == NULL) {
+	    Sint need = 128;
+	    erts_smp_atomic_add_nob(&mem_used, need);
+	    r[dst].modules = erts_alloc(ERTS_ALC_T_MODULE_REFS,
+					need * sizeof(Range));
+	    r[dst].allocated = need;
+	    write_ptr = r[dst].modules;
 	}
-	r[src] = r[dst];
-	r[dst].modules = 0;
     }
 
-    CHECK(&r[src]);
-
-    ASSERT(r[dst].modules == NULL);
-    need = r[dst].allocated = r[src].n + 1;
-    erts_smp_atomic_add_nob(&mem_used, need);
-    r[dst].modules = (Range *) erts_alloc(ERTS_ALC_T_MODULE_REFS,
-					  need * sizeof(Range));
-    n = 0;
-    for (i = 0; i < r[src].n; i++) {
-	Range* rp = r[src].modules+i;
-	if (code < rp->start) {
-	    r[dst].modules[n].start = code;
-	    erts_smp_atomic_init_nob(&r[dst].modules[n].end,
-				     (erts_aint_t)(((byte *)code) + size));
-	    ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < code);
-	    n++;
-	    break;
-	}
-	if (rp->start < RANGE_END(rp)) {
-	    /* Only insert a module that has not been purged. */
-	    r[dst].modules[n].start = rp->start;
-	    erts_smp_atomic_init_nob(&r[dst].modules[n].end,
-				     (erts_aint_t)(RANGE_END(rp)));
-	    ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < rp->start);
-	    n++;
-	}
-    }
-
-    while (i < r[src].n) {
-	Range* rp = r[src].modules+i;
-	if (rp->start < RANGE_END(rp)) {
-	    /* Only insert a module that has not been purged. */
-	    r[dst].modules[n].start = rp->start;
-	    erts_smp_atomic_init_nob(&r[dst].modules[n].end,
-				     (erts_aint_t)(RANGE_END(rp)));
-	    ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < rp->start);
-	    n++;
-	}
-	i++;
-    }
-
-    if (n == 0 || code > r[dst].modules[n-1].start) {
-	r[dst].modules[n].start = code;
-	erts_smp_atomic_init_nob(&r[dst].modules[n].end,
-				 (erts_aint_t)(((byte *)code) + size));
-	ASSERT(!n || RANGE_END(&r[dst].modules[n-1]) < code);
-	n++;
-    }
-
-    ASSERT(n <= r[src].n+1);
-    r[dst].n = n;
-    erts_smp_atomic_set_nob(&r[dst].mid,
-			    (erts_aint_t) (r[dst].modules + n / 2));
-
-    CHECK(&r[dst]);
-    CHECK(&r[src]);
+    ASSERT(r[dst].modules);
+    write_ptr->start = code;
+    erts_smp_atomic_init_nob(&(write_ptr->end),
+			     (erts_aint_t)(((byte *)code) + size));
+    write_ptr++;
 }
 
 void
author	Björn Gustavsson <[email protected]>	2015-11-30 15:35:47 +0100
committer	Björn Gustavsson <[email protected]>	2016-02-25 14:50:48 +0100
commit	8f4c278b69fe4d613a0b865a2edac43231cad913 (patch)
tree	3e55347065faba3a2ae5950935da336fc8ffe7ab /erts/emulator/beam/beam_ranges.c
parent	e1be12434b06fb2594af5cdafc5efc5b9182d8b6 (diff)
download	otp-8f4c278b69fe4d613a0b865a2edac43231cad913.tar.gz otp-8f4c278b69fe4d613a0b865a2edac43231cad913.tar.bz2 otp-8f4c278b69fe4d613a0b865a2edac43231cad913.zip