diff options
author | Björn Gustavsson <[email protected]> | 2015-02-18 17:43:18 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2015-02-20 09:56:41 +0100 |
commit | f342b0322f0c594f18e9b5dffd0c5c751804b47f (patch) | |
tree | 161d47325490ccc448e9ccbb27fcdd6ae991b01e /lib/compiler | |
parent | 36a515e52d89a6a5f87c271bdea794394ca35d27 (diff) | |
download | otp-f342b0322f0c594f18e9b5dffd0c5c751804b47f.tar.gz otp-f342b0322f0c594f18e9b5dffd0c5c751804b47f.tar.bz2 otp-f342b0322f0c594f18e9b5dffd0c5c751804b47f.zip |
beam_jump: Eliminate pathologically slow compilation
José Valim noticed that code such as:
match(1) -> 1;
match(2) -> 2;
match(3) -> 3;
...
match(1000) -> 1000.
would compile very slowly. The culprit is opt/3 in beam_jump.
What happens is that opt/3 will rewrite this code:
select_val ...
label 1
jump 1000
label 2
jump 1000
...
label 999
jump 1000
label 1000
return
very slowly to this code:
select_val ...
label 1
label 2
...
label 999
label 1000
return
The reason for the slowness is that when opt/3 sees this
sequence:
label 1
jump 1000
...
it will remove the label (storing it in a dictionary),
and pick up the previously processed instruction from
the accumulator:
select_val ...
jump 1000
label 2
jump 1000
...
That is done in order to process all labels before the
jump and also to get rid of the jump instruction if the
previous instruction is an "unreachable after". In this
case, re-processing the sequence will remove the now
unreachable jump instruction:
select_val ...
label 2
jump 1000
...
The problem is that re-processing the select_val instruction is
expensive. The instruction has a list of 1000 labels, all of which
will be added (again) to the set of referenced labels. The
select_val instruction will be re-processed again and again
until all labels and jumps have been gobbled up.
In the original version of beam_jump, opt/3 was not called
repeatedly until a fixpoint was found, but was expected to do
all its optimizations in one pass. The fixpoint iteration was
added later.
Since we now have the fixpoint iteration, there is no need
to do everything in a single pass. When we encounter a jump, we will
collect all previously seen labels and put them into the dictionary,
and then we will move on.
As a further optimization, we will look for sequences like this:
jump X
label ...
jump X
and replace them with:
label ...
jump X
In the example above, that will avoid 1000 updates of the dictionary.
After applying this optimization, compilation of the
pattern went from roughly 55 s to 0.1 s for the example
above but with 10000 clauses.
Reported-by: José Valim
Diffstat (limited to 'lib/compiler')
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index 4e699c4fbf..52fe0e90ce 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -295,12 +295,6 @@ opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) -> opt(Is, [I|Acc], label_used(Lbl, St)); opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) -> skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St)); -opt([{label,L}=I|Is], Acc, #st{entry=L}=St) -> - %% NEVER move the entry label. - opt(Is, [I|Acc], St); -opt([{label,L1},{jump,{f,L2}}=I|Is], [Prev|Acc], St0) -> - St = St0#st{mlbl=dict:append(L2, L1, St0#st.mlbl)}, - opt([Prev,I|Is], Acc, label_used({f,L2}, St)); opt([{label,Lbl}=I|Is], Acc, #st{mlbl=Mlbl}=St0) -> case dict:find(Lbl, Mlbl) of {ok,Lbls} -> @@ -310,9 +304,20 @@ opt([{label,Lbl}=I|Is], Acc, #st{mlbl=Mlbl}=St0) -> insert_labels([Lbl|Lbls], Is, Acc, St); error -> opt(Is, [I|Acc], St0) end; -opt([{jump,{f,Lbl}},{label,Lbl}=I|Is], Acc, St) -> - opt([I|Is], Acc, St); -opt([{jump,Lbl}=I|Is], Acc, St) -> +opt([{jump,{f,_}=X}|[{label,_},{jump,X}|_]=Is], Acc, St) -> + opt(Is, Acc, St); +opt([{jump,{f,Lbl}}|[{label,Lbl}|_]=Is], Acc, St) -> + opt(Is, Acc, St); +opt([{jump,{f,L}=Lbl}=I|Is], Acc0, #st{mlbl=Mlbl0}=St0) -> + %% All labels before this jump instruction should now be + %% moved to the location of the jump's target. + {Lbls,Acc} = collect_labels(Acc0, St0), + St = case Lbls of + [] -> St0; + [_|_] -> + Mlbl = dict:append_list(L, Lbls, Mlbl0), + St0#st{mlbl=Mlbl} + end, skip_unreachable(Is, [I|Acc], label_used(Lbl, St)); %% Optimization: quickly handle some common instructions that don't %% have any failure labels and where is_unreachable_after(I) =:= false. @@ -349,6 +354,17 @@ insert_fc_labels([L|Ls], Mlbl, Acc0) -> end; insert_fc_labels([], _, Acc) -> Acc. +collect_labels(Is, #st{entry=Entry}) -> + collect_labels_1(Is, Entry, []). + +collect_labels_1([{label,Entry}|_]=Is, Entry, Acc) -> + %% Never move the entry label. + {Acc,Is}; +collect_labels_1([{label,L}|Is], Entry, Acc) -> + collect_labels_1(Is, Entry, [L|Acc]); +collect_labels_1(Is, _Entry, Acc) -> + {Acc,Is}. + %% label_defined(Is, Label) -> true | false. %% Test whether the label Label is defined at the start of the instruction %% sequence, possibly preceeded by other label definitions. |