From b4030b60b58a681b2dea5453fbc36f7f4cc41bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Jul 2015 15:21:30 +0200 Subject: Teach the compiler the 'da' and 'dz' options Add the 'da' option to create a list after the beam_a pass. Seeing how the code looks after beam_a, but before the blocks have been established, is sometimes useful. For symmetry, add the 'dz' option, even though it is just a synonym for 'S'. --- lib/compiler/src/compile.erl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/compiler/src/compile.erl') diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index e0a29fe9b1..cf79fdc9f9 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -671,6 +671,7 @@ asm_passes() -> %% Assembly level optimisations. [{delay, [{pass,beam_a}, + {iff,da,{listing,"a"}}, {unless,no_postopt, [{pass,beam_block}, {iff,dblk,{listing,"block"}}, @@ -703,6 +704,7 @@ asm_passes() -> {iff,no_postopt,[{pass,beam_clean}]}, {pass,beam_z}, + {iff,dz,{listing,"z"}}, {iff,dopt,{listing,"optimize"}}, {iff,'S',{listing,"S"}}, {iff,'to_asm',{done,"S"}}]}, -- cgit v1.2.3 From c288ab87fd6cafe22ce46be551baa2e815b495b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Jul 2015 10:45:38 +0200 Subject: Delay get_tuple_element instructions until they are needed When matching tuples, the pattern matching compiler would generate code that would fetch all elements of the tuple that will ultimately be used, *before* testing that (for example) the first element is the correct record tag. For example: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} is_eq_exact Fail {x,1} some_tag If {x,2} and {x,3} are not used at label Fail, we can re-arrange the code like this: is_tuple Fail {x,0} test_arity Fail {x,0} 3 get_tuple_element {x,0} 0 {x,1} is_eq_exact Fail {x,1} some_tag get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} Doing that may be beneficial in two ways. If the branch is taken, we have eliminated the execution of two unnecessary instructions. Even if the branch is never or rarely taken, there is the possibility for more optimizations following the is_eq_exact instructions. For example, imagine that the code looks like this: get_tuple_element {x,0} 1 {x,2} get_tuple_element {x,0} 2 {x,3} move {x,2} {y,0} move {x,3} {y,1} Assuming that {x,2} and {x,3} have no further uses in the code that follows, that can be rewritten to: get_tuple_element {x,0} 1 {y,0} get_tuple_element {x,0} 2 {y,1} When should we perform this optimization? At the very latest, it must be done before opt_blocks/1 in beam_block which does the elimination of unnecessary moves. Actually, we want do the optimization before the blocks have been established, since moving instructions out of one block into another is cumbersome. Therefore, we will do the optimization in a new pass that is run before beam_block. A new pass will make debugging easier, and beam_block already has a fair number of sub passes. --- lib/compiler/src/compile.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/compiler/src/compile.erl') diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index cf79fdc9f9..605f5b8fd5 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -673,7 +673,9 @@ asm_passes() -> [{pass,beam_a}, {iff,da,{listing,"a"}}, {unless,no_postopt, - [{pass,beam_block}, + [{unless,no_reorder,{pass,beam_reorder}}, + {iff,dre,{listing,"reorder"}}, + {pass,beam_block}, {iff,dblk,{listing,"block"}}, {unless,no_except,{pass,beam_except}}, {iff,dexcept,{listing,"except"}}, -- cgit v1.2.3