aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2018-10-29 16:31:00 +0100
committerBjörn Gustavsson <[email protected]>2018-11-21 10:09:10 +0100
commite79e5f194577d6c5b289415ec84338f70d2486db (patch)
tree3677f1347f8db479329a28130eefb3809235399f
parentb4595a670b159c802489d94d8537a48567967927 (diff)
downloadotp-e79e5f194577d6c5b289415ec84338f70d2486db.tar.gz
otp-e79e5f194577d6c5b289415ec84338f70d2486db.tar.bz2
otp-e79e5f194577d6c5b289415ec84338f70d2486db.zip
Sort move instructions on the Y register
Sort sequences of `move` instructions on the Y register. When moving from X registers to Y registers, having the instructions sorted on Y registers give the loader more opportunities to use `move_window{3,4,5}` instructions. For examples, the following five instructions: move_xy x(2) y(0) move_xy x(1) y(1) move_xy x(0) y(2) move_xy x(5) y(3) move_xy x(4) y(4) can be replaced with: move_window5_xxxxxy x(2) x(1) x(0) x(5) x(4) y(0) When the Y registers are not ordered so that `move_window5` can be used, the loader would typically combine the first three moves to a `move3_xyxyxy` instruction and the last two moves to a `move2_par_xyxy` instruction. When moving from Y registers to X registers, sorting on the Y registers could potentially be more cache-friendly. It could also be worthwhile investigating a new `move_window` instruction in the BEAM interpreter that could move values from contiguous Y registers to X registers. Note that `scripts/diffable` can generate diffable dissambly files for the loaded BEAM code: $ scripts/diffable --dis 0 $ scripts/diffable --dis 1 $ diff -u 0 1
-rw-r--r--lib/compiler/src/beam_block.erl38
1 files changed, 36 insertions, 2 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 9d8d5b2b0c..707974b2c1 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -22,7 +22,7 @@
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1,splitwith/2]).
+-import(lists, [keysort/2,reverse/1,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) ->
case collect(I) of
error -> blockify(Is0, [I|Acc]);
Instr when is_tuple(Instr) ->
- {Block,Is} = collect_block(IsAll),
+ {Block0,Is} = collect_block(IsAll),
+ Block = sort_moves(Block0),
blockify(Is, [{block,Block}|Acc])
end;
blockify([], Acc) -> reverse(Acc).
@@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
embed_lines([I|Is], Acc) ->
embed_lines(Is, [I|Acc]);
embed_lines([], Acc) -> Acc.
+
+%% sort_moves([Instruction]) -> [Instruction].
+%% Sort move instructions on the Y register to give the loader
+%% more opportunities for combining instructions.
+
+sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, x, y, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, y, x, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([I|Is]) ->
+ [I|sort_moves(Is)];
+sort_moves([]) -> [].
+
+sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) ->
+ %% The loader sometimes combines a move to x0 with the
+ %% instruction that follows, producing, for example, a move_call
+ %% instruction. Therefore, we don't want include this move
+ %% instruction in the sorting.
+ {sort_on_yreg(Acc)++[I],Is};
+sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) ->
+ sort_moves_1(Is, DTag, STag, [I|Acc]);
+sort_moves_1(Is, _DTag, _STag, Acc) ->
+ {sort_on_yreg(Acc),Is}.
+
+sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) ->
+ case {Dst,Src} of
+ {{y,_},{x,_}} ->
+ keysort(2, Moves);
+ {{x,_},{y,_}} ->
+ keysort(3, Moves)
+ end.