aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2019-08-26 10:26:06 +0200
committerJohn Högberg <[email protected]>2019-08-26 10:38:06 +0200
commit9ee1b789e2a085a8cf59c249b46130a8c8801d0e (patch)
tree6a93fd439b1ee1d9d3d89f6eb035e98d6adb71d0
parentdb00e5033b964471b378f872e0f2f64e82da83be (diff)
downloadotp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.tar.gz
otp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.tar.bz2
otp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.zip
erts: Simplify deallocate_return
-rw-r--r--erts/emulator/beam/instrs.tab39
1 files changed, 18 insertions, 21 deletions
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 156de67716..38b1e5909b 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -66,11 +66,25 @@ deallocate(Deallocate) {
E = ADD_BYTE_OFFSET(E, $Deallocate);
}
+//
+// Micro-benchmarks showed that the deallocate_return instruction
+// became slower when the continuation pointer was moved from
+// the process struct to the stack. The reason seems to be read
+// dependencies, i.e. that the CPU cannot figure out beforehand
+// from which position on the stack the continuation pointer
+// should be fetched.
+//
+// Initializing num_bytes with a constant value seems to restore
+// the lost speed, so we've specialized the instruction for the
+// most common values.
+//
+
deallocate_return0 := dealloc_ret.n0.execute;
deallocate_return1 := dealloc_ret.n1.execute;
deallocate_return2 := dealloc_ret.n2.execute;
deallocate_return3 := dealloc_ret.n3.execute;
deallocate_return4 := dealloc_ret.n4.execute;
+deallocate_return := dealloc_ret.var.execute;
dealloc_ret.head() {
Uint num_bytes;
@@ -96,36 +110,19 @@ dealloc_ret.n4() {
num_bytes = (4+1) * sizeof(Eterm);
}
+dealloc_ret.var(Deallocate) {
+ num_bytes = $Deallocate;
+}
+
dealloc_ret.execute() {
//| -no_next
- /*
- * Micro-benchmarks showed that the deallocate_return instruction
- * became slower when the continuation pointer was moved from
- * the process struct to the stack. The reason seems to be read
- * dependencies, i.e. that the CPU cannot figure out beforehand
- * from which position on the stack the continuation pointer
- * should be fetched.
- *
- * Making sure that num_bytes is always initialized with a
- * constant value seems to restore the lost speed.
- */
-
E = ADD_BYTE_OFFSET(E, num_bytes);
$RETURN();
CHECK_TERM(x(0));
DispatchReturn;
}
-deallocate_return(Deallocate) {
- //| -no_next
- Uint bytes_to_pop = $Deallocate;
- E = ADD_BYTE_OFFSET(E, bytes_to_pop);
- $RETURN();
- CHECK_TERM(x(0));
- DispatchReturn;
-}
-
move_deallocate_return(Src, Deallocate) {
//| -no_next