diff options
author | John Högberg <[email protected]> | 2019-08-27 11:31:25 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2019-08-27 11:31:25 +0200 |
commit | 689c3a5bed74bc80ae999e06983810f9543a8cd7 (patch) | |
tree | 514d7505ec16491f11f74e220bb1ac46f5a00b36 | |
parent | 5abb822ee402f008b3fdd2d863ffac93bbe6bc09 (diff) | |
parent | 9ee1b789e2a085a8cf59c249b46130a8c8801d0e (diff) | |
download | otp-689c3a5bed74bc80ae999e06983810f9543a8cd7.tar.gz otp-689c3a5bed74bc80ae999e06983810f9543a8cd7.tar.bz2 otp-689c3a5bed74bc80ae999e06983810f9543a8cd7.zip |
Merge branch 'john/erts/cp-management-fixup'
* john/erts/cp-management-fixup:
erts: Simplify deallocate_return
-rw-r--r-- | erts/emulator/beam/instrs.tab | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index 156de67716..38b1e5909b 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -66,11 +66,25 @@ deallocate(Deallocate) { E = ADD_BYTE_OFFSET(E, $Deallocate); } +// +// Micro-benchmarks showed that the deallocate_return instruction +// became slower when the continuation pointer was moved from +// the process struct to the stack. The reason seems to be read +// dependencies, i.e. that the CPU cannot figure out beforehand +// from which position on the stack the continuation pointer +// should be fetched. +// +// Initializing num_bytes with a constant value seems to restore +// the lost speed, so we've specialized the instruction for the +// most common values. +// + deallocate_return0 := dealloc_ret.n0.execute; deallocate_return1 := dealloc_ret.n1.execute; deallocate_return2 := dealloc_ret.n2.execute; deallocate_return3 := dealloc_ret.n3.execute; deallocate_return4 := dealloc_ret.n4.execute; +deallocate_return := dealloc_ret.var.execute; dealloc_ret.head() { Uint num_bytes; @@ -96,36 +110,19 @@ dealloc_ret.n4() { num_bytes = (4+1) * sizeof(Eterm); } +dealloc_ret.var(Deallocate) { + num_bytes = $Deallocate; +} + dealloc_ret.execute() { //| -no_next - /* - * Micro-benchmarks showed that the deallocate_return instruction - * became slower when the continuation pointer was moved from - * the process struct to the stack. The reason seems to be read - * dependencies, i.e. that the CPU cannot figure out beforehand - * from which position on the stack the continuation pointer - * should be fetched. - * - * Making sure that num_bytes is always initialized with a - * constant value seems to restore the lost speed. - */ - E = ADD_BYTE_OFFSET(E, num_bytes); $RETURN(); CHECK_TERM(x(0)); DispatchReturn; } -deallocate_return(Deallocate) { - //| -no_next - Uint bytes_to_pop = $Deallocate; - E = ADD_BYTE_OFFSET(E, bytes_to_pop); - $RETURN(); - CHECK_TERM(x(0)); - DispatchReturn; -} - move_deallocate_return(Src, Deallocate) { //| -no_next |