diff options
author | John Högberg <[email protected]> | 2019-08-26 10:26:06 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2019-08-26 10:38:06 +0200 |
commit | 9ee1b789e2a085a8cf59c249b46130a8c8801d0e (patch) | |
tree | 6a93fd439b1ee1d9d3d89f6eb035e98d6adb71d0 | |
parent | db00e5033b964471b378f872e0f2f64e82da83be (diff) | |
download | otp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.tar.gz otp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.tar.bz2 otp-9ee1b789e2a085a8cf59c249b46130a8c8801d0e.zip |
erts: Simplify deallocate_return
-rw-r--r-- | erts/emulator/beam/instrs.tab | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index 156de67716..38b1e5909b 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -66,11 +66,25 @@ deallocate(Deallocate) { E = ADD_BYTE_OFFSET(E, $Deallocate); } +// +// Micro-benchmarks showed that the deallocate_return instruction +// became slower when the continuation pointer was moved from +// the process struct to the stack. The reason seems to be read +// dependencies, i.e. that the CPU cannot figure out beforehand +// from which position on the stack the continuation pointer +// should be fetched. +// +// Initializing num_bytes with a constant value seems to restore +// the lost speed, so we've specialized the instruction for the +// most common values. +// + deallocate_return0 := dealloc_ret.n0.execute; deallocate_return1 := dealloc_ret.n1.execute; deallocate_return2 := dealloc_ret.n2.execute; deallocate_return3 := dealloc_ret.n3.execute; deallocate_return4 := dealloc_ret.n4.execute; +deallocate_return := dealloc_ret.var.execute; dealloc_ret.head() { Uint num_bytes; @@ -96,36 +110,19 @@ dealloc_ret.n4() { num_bytes = (4+1) * sizeof(Eterm); } +dealloc_ret.var(Deallocate) { + num_bytes = $Deallocate; +} + dealloc_ret.execute() { //| -no_next - /* - * Micro-benchmarks showed that the deallocate_return instruction - * became slower when the continuation pointer was moved from - * the process struct to the stack. The reason seems to be read - * dependencies, i.e. that the CPU cannot figure out beforehand - * from which position on the stack the continuation pointer - * should be fetched. - * - * Making sure that num_bytes is always initialized with a - * constant value seems to restore the lost speed. - */ - E = ADD_BYTE_OFFSET(E, num_bytes); $RETURN(); CHECK_TERM(x(0)); DispatchReturn; } -deallocate_return(Deallocate) { - //| -no_next - Uint bytes_to_pop = $Deallocate; - E = ADD_BYTE_OFFSET(E, bytes_to_pop); - $RETURN(); - CHECK_TERM(x(0)); - DispatchReturn; -} - move_deallocate_return(Src, Deallocate) { //| -no_next |