diff options
Diffstat (limited to 'lib/hipe/rtl/hipe_rtl_mk_switch.erl')
-rw-r--r-- | lib/hipe/rtl/hipe_rtl_mk_switch.erl | 985 |
1 files changed, 985 insertions, 0 deletions
diff --git a/lib/hipe/rtl/hipe_rtl_mk_switch.erl b/lib/hipe/rtl/hipe_rtl_mk_switch.erl new file mode 100644 index 0000000000..e5175217d6 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_mk_switch.erl @@ -0,0 +1,985 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_mk_switch.erl +%% Module : hipe_rtl_mk_switch +%% Purpose : Implements switching on Erlang values. +%% Notes : Only fixnums are supported well, +%% atoms work with table search, +%% the inline search of atoms might have some bugs. +%% Should be extended to handle bignums and floats. +%% +%% History : * 2001-02-28 Erik Johansson ([email protected]): +%% Created. +%% * 2001-04-01 Erik Trulsson ([email protected]): +%% Stefan Lindstr�m ([email protected]): +%% Added clustering and inlined binary search trees. +%% * 2001-07-30 EJ ([email protected]): +%% Fixed some bugs and started cleanup. +%% ==================================================================== +%% Exports : +%% gen_switch_val(I, VarMap, ConstTab, Options) +%% gen_switch_tuple(I, Map, ConstTab, Options) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_mk_switch). + +-export([gen_switch_val/4, gen_switch_tuple/4]). + +%%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). + +%%------------------------------------------------------------------------- + +-define(MINFORJUMPTABLE,9). + % Minimum number of integers needed to use something else than an inline search. +-define(MINFORINTSEARCHTREE,65). % Must be at least 3 + % Minimum number of integer elements needed to use a non-inline binary search. + +-define(MININLINEATOMSEARCH,8). + % Minimum number of atoms needed to use an inline binary search instead + % of a fast linear search. + +-define(MINFORATOMSEARCHTREE,20). % Must be at least 3 + % Minimum number of atoms needed to use a non-inline binary search instead + % of a linear search. + +-define(MAXINLINEATOMSEARCH,64). % Must be at least 3 + % The cutoff point between inlined and non-inlined binary search for atoms + +-define(WORDSIZE, hipe_rtl_arch:word_size()). +-define(MINDENSITY, 0.5). + % Minimum density required to use a jumptable instead of a binary search. + +%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be +%% at least 3 is that the function tab/5 will enter an infinite loop +%% and hang when faced with a switch of size 1 or 2. + + +%% Options used by this module: +%% +%% [no_]use_indexing +%% Determines if any indexing be should be done at all. Turned on +%% by default at optimization level o2 and higher. +%% +%% [no_]use_clusters +%% Controls whether we attempt to divide sparse integer switches +%% into smaller dense clusters for which jumptables are practical. +%% Turned off by default since it can increase compilation time +%% considerably and most programs will gain little benefit from it. +%% +%% [no_]use_inline_atom_search +%% Controls whether we use an inline binary search for small number +%% of atoms. Turned off by default since this is currently only +%% supported on SPARC (and not on x86) and probably needs a bit +%% more testing before it can be turned on by default. + +gen_switch_val(I, VarMap, ConstTab, Options) -> + case proplists:get_bool(use_indexing, Options) of + false -> gen_slow_switch_val(I, VarMap, ConstTab, Options); + true -> gen_fast_switch_val(I, VarMap, ConstTab, Options) + end. + +gen_fast_switch_val(I, VarMap, ConstTab, Options) -> + {Arg, VarMap0} = + hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap), + IcodeFail = hipe_icode:switch_val_fail_label(I), + {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0), + %% Important that the list of cases is sorted when handling integers. + UnsortedCases = hipe_icode:switch_val_cases(I), + Cases = lists:sort(UnsortedCases), + + check_duplicates(Cases), + %% This check is currently not really necessary. The checking + %% happens at an earlier phase of the compilation. + {Types, InitCode} = split_types(Cases, Arg), + handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}). + +handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) -> + {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases, + VarMap, + ConstTab, Arg, Info), + handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info); +handle_types([], Code, VarMap, ConstTab, _, _) -> + {Code, VarMap, ConstTab}. + + +gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options}) -> + {First,_} = hd(Cases), + Min = hipe_icode:const_value(First), + if length(Cases) < ?MINFORJUMPTABLE -> + gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options); + true -> + case proplists:get_bool(use_clusters, Options) of + false -> + M = list_to_tuple(Cases), + D = density(M, 1, tuple_size(M)), + if + D >= ?MINDENSITY -> + gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end; + true -> + MC = minclusters(Cases), + Cl = cluster_split(Cases,MC), + CM = cluster_merge(Cl), + find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I)) + end + end; +gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options}) -> + case proplists:get_bool(use_inline_atom_search, Options) of + true -> + if + length(Cases) < ?MININLINEATOMSEARCH -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + length(Cases) > ?MAXINLINEATOMSEARCH -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options) + end; + false -> + if length(Cases) < ?MINFORATOMSEARCHTREE -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end + end; +gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options}) -> + %% We can only handle smart indexing of integers and atoms + %% TODO: Consider bignum + gen_slow_switch_val(I, VarMap, ConstTab, Options). + + +%% Split different types into separate switches. +split_types([Case|Cases], Arg) -> + Type1 = casetype(Case), + Types = split(Cases,Type1,[Case],[]), + switch_on_types(Types,[], [], Arg); +split_types([],_) -> + %% Cant happen. + ?EXIT({empty_caselist}). + +switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) -> + Lbl = hipe_rtl:mk_new_label(), + I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)), + {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])}; +switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) -> + %% Make sure the general case is handled last. + switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg); +switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) -> + TLab = hipe_rtl:mk_new_label(), + FLab = hipe_rtl:mk_new_label(), + TestCode = + case Type of + integer -> + hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + atom -> + hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + bignum -> + hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + _ -> ?EXIT({ooops, type_not_handled, Type}) + end, + switch_on_types(Rest, [[TestCode,FLab] | AccCode], + [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg). + +split([Case|Cases], Type, Current, Rest) -> + case casetype(Case) of + Type -> + split(Cases, Type, [Case|Current],Rest); + Other -> + split(Cases, Other, [Case], [{Type,Current}|Rest]) + end; +split([], Type, Current, Rest) -> + [{Type, Current} | Rest]. + +%% Determine what type an entry in the caselist has + +casetype({Const,_}) -> + casetype(hipe_icode:const_value(Const)); +casetype(A) -> + if + is_integer(A) -> + case hipe_tagscheme:is_fixnum(A) of + true -> integer; + false -> bignum + end; + is_float(A) -> float; + is_atom(A) -> atom; + true -> other + end. + +%% check that no duplicate values occur in the case list and also +%% check that all case values have the same type. +check_duplicates([]) -> true; +check_duplicates([_]) -> true; +check_duplicates([{Const1,_},{Const2,L2}|T]) -> + C1 = hipe_icode:const_value(Const1), + C2 = hipe_icode:const_value(Const2), + %% T1 = casetype(C1), + %% T2 = casetype(C2), + if C1 =/= C2 -> %% , T1 =:= T2 -> + check_duplicates([{Const2,L2}|T]); + true -> + ?EXIT({bad_values_in_switchval,C1}) + end. + +%% +%% Determine the optimal way to divide Cases into clusters such that each +%% cluster is dense. +%% +%% See: +%% Producing Good Code for the Case Statement, Robert L. Bernstein +%% Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024 +%% And +%% Correction to "Producing Good Code for the Case Statement" +%% Sampath Kannan and Todd A. Proebsting, +%% Software - Practice and Experience vol 24, 1994, no 2, p 233 +%% +%% (The latter is where the algorithm comes from.) + +%% This function will return a tuple with the first element being 0 +%% The rest of the elements being integers. A value of M at index N +%% (where the first element is considered to have index 0) means that +%% the first N cases can be divided into M (but no fewer) clusters where +%% each cluster is dense. + +minclusters(Cases) when is_list(Cases) -> + minclusters(list_to_tuple(Cases)); +minclusters(Cases) when is_tuple(Cases) -> + N = tuple_size(Cases), + MinClusters = list_to_tuple([0|n_list(N,inf)]), + i_loop(1,N,MinClusters,Cases). + +%% Create a list with N elements initialized to Init +n_list(0,_) -> []; +n_list(N,Init) -> [Init | n_list(N-1,Init)]. + +%% Do the dirty work of minclusters +i_loop(I,N,MinClusters,_Cases) when I > N -> + MinClusters; +i_loop(I,N,MinClusters,Cases) when I =< N -> + M = j_loop(0, I-1, MinClusters, Cases), + i_loop(I+1, N, M, Cases). + +%% More dirty work +j_loop(J,I1,MinClusters,_Cases) when J > I1 -> + MinClusters; +j_loop(J,I1,MinClusters,Cases) when J =< I1 -> + D = density(Cases,J+1,I1+1), + A0 = element(J+1,MinClusters), + A = if + is_number(A0) -> + A0+1; + true -> + A0 + end, + B = element(I1+2,MinClusters), + M = if + D >= ?MINDENSITY, A<B -> + setelement(I1+2,MinClusters,A); + true -> + MinClusters + end, + j_loop(J+1,I1,M,Cases). + + +%% Determine the density of a (subset of a) case list +%% A is a tuple with the cases in order from smallest to largest +%% I is the index of the first element and J of the last + +density(A,I,J) -> + {AI,_} = element(I,A), + {AJ,_} = element(J,A), + (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1). + + +%% Split a case list into dense clusters +%% Returns a list of lists of cases. +%% +%% Cases is the case list and Clust is a list describing the optimal +%% clustering as returned by minclusters +%% +%% If the value in the last place in minclusters is M then we can +%% split the case list into M clusters. We then search for the last +%% (== right-most) occurance of the value M-1 in minclusters. That +%% indicates the largest number of cases that can be split into M-1 +%% clusters. This means that the cases in between constitute one +%% cluster. Then we recurse on the remainder of the cases. +%% +%% The various calls to lists:reverse are just to ensure that the +%% cases remain in the correct, sorted order. + +cluster_split(Cases, Clust) -> + A = tl(tuple_to_list(Clust)), + Max = element(tuple_size(Clust), Clust), + L1 = lists:reverse(Cases), + L2 = lists:reverse(A), + cluster_split(Max, [], [], L1, L2). + +cluster_split(0, [], Res, Cases, _Clust) -> + L = lists:reverse(Cases), + {H,_} = hd(L), + {T,_} = hd(Cases), + [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res]; +cluster_split(N, [], Res, Cases, [N|_] = Clust) -> + cluster_split(N-1, [], Res, Cases, Clust); +cluster_split(N,Sofar,Res,Cases,[N|Clust]) -> + {H,_} = hd(Sofar), + {T,_} = lists:last(Sofar), + cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]); +cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) -> + cluster_split(N,[C|Sofar],Res,Cases,Clust). + +%% +%% Merge adjacent small clusters into larger sparse clusters +%% +cluster_merge([C]) -> [C]; +cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + C2 = cluster_merge(T), + [{dense,Min,Max,C}|C2]; +cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + R = cluster_merge([{dense,Min,Max,C}|T]), + [A|R]. + + +%% Generate code to search for the correct cluster + +find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) -> + case length(C) < ?MINFORINTSEARCHTREE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options) + end; +find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + case length(C) < ?MINFORJUMPTABLE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min) + end; +find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + ClustLab = hipe_rtl:mk_new_label(), + NextLab = hipe_rtl:mk_new_label(), + {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail), + + {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail), + + {[ + hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)), + hipe_rtl:label_name(NextLab), + hipe_rtl:label_name(ClustLab), 0.50), + ClustLab + ] ++ + ClustCode ++ + [NextLab] ++ + Rest, + V2,C2}. + +%% Generate efficient code for a linear search through the case list. +%% Only works for atoms and integer. +gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Code = fast_linear_search(Arg,Values,LabMap,Fail), + {Code,VarMap1,ConstTab}. + +fast_linear_search(_Arg,[],[],Fail) -> + [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))]; +fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + NextLab = hipe_rtl:mk_new_label(), + C2 = fast_linear_search(Arg,Cases,Labels,Fail), + C1 = + if + is_integer(Case) -> + TVal = hipe_tagscheme:mk_fixnum(Case), + [ + hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + is_atom(Case) -> + [ + hipe_rtl:mk_load_atom(Reg,Case), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + true -> % This should never happen ! + ?EXIT({internal_error_in_switch_val,Case}) + end, + [C1,C2]. + + +%% Generate code to search through a small cluster of integers using +%% binary search +gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Keys = [hipe_tagscheme:mk_fixnum(X) % Add tags to the values + || X <- Values], + Code = inline_search(Keys, LabMap, Arg, Fail), + {Code, VarMap1, ConstTab}. + + +%% Generate code to search through a small cluster of atoms +gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values, _Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + LMap = [{label,L} || L <- LabMap], + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {NewConstTab2,LabId} = + hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values), + Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap), + {Code, VarMap1, NewConstTab2}. + + +%% calculate the middle position of a list (+ 1 because of 1-indexing of lists) +get_middle(List) -> + N = length(List), + N div 2 + 1. + +%% get element [N1, N2] from a list +get_cases(_, 0, 0) -> + []; +get_cases([H|T], 0, N) -> + [H | get_cases(T, 0, N - 1)]; +get_cases([_|T], N1, N2) -> + get_cases(T, N1 - 1, N2 - 1). + + +%% inline_search/4 creates RTL code for a inlined binary search. +%% It requires two sorted tables - one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% Default - A label to jump to if the key is not found. +%% + +inline_search([], _LabelList, _KeyReg, _Default) -> []; +inline_search(KeyList, LabelList, KeyReg, Default) -> + %% Create some registers and labels that we need. + Reg = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + + Length = length(KeyList), + + if + Length >= 3 -> + %% Get middle element and keys/labels before that and after + Middle_pos = get_middle(KeyList), + Middle_key = lists:nth(Middle_pos, KeyList), + Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1), + Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1), + Keys_ending = get_cases(KeyList, Middle_pos, Length), + Labels_ending = get_cases(LabelList, Middle_pos, Length), + + %% Create the code. + + %% Get the label and build it up properly + Middle_label = lists:nth(Middle_pos, LabelList), + + A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + Middle_label , 0.5), + Lab2], + %% build search tree for keys less than the middle element + B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default), + %% ...and for keys bigger than the middle element + D = inline_search(Keys_ending, Labels_ending, KeyReg, Default), + + %% append the code and return it + A ++ B ++ [Lab3] ++ D; + + Length =:= 2 -> + %% get the first and second elements and theirs labels + Key_first = hd(KeyList), + First_label = hd(LabelList), + + %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)), + Key_second = lists:nth(2, KeyList), + Second_label = lists:nth(2, LabelList), + + NewLab = hipe_rtl:mk_new_label(), + + %% compare them + A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + First_label, + hipe_rtl:label_name(NewLab) , 0.5), + NewLab], + + B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Second_label, + hipe_rtl:label_name(Default) , 0.5)], + A ++ B; + + Length =:= 1 -> + Key = hd(KeyList), + Label = hd(LabelList), + + [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Label, + hipe_rtl:label_name(Default) , 0.5)] + end. + + +inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + + Length = (End - Start) + 1, + + if + Length >= 3 -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + Mid = ((End-Start) div 2)+Start, + End1 = Mid-1, + Start1 = Mid+1, + A = [ + hipe_rtl:mk_load_word_index(Reg,Block,Mid), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + hipe_rtl:label_name(Lab4), 0.5), + Lab4, + hipe_rtl:mk_goto_index(LBlock, Mid, Labels), + Lab2 + ], + B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)], + C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)], + A ++ B ++ [Lab3] ++ C; + + Length =:= 2 -> + L1 = hipe_rtl:mk_new_label(), + L2 = hipe_rtl:mk_new_label(), + L3 = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L1), + hipe_rtl:label_name(L2), 0.5), + L1, + hipe_rtl:mk_goto_index(LBlock,Start,Labels), + + L2, + hipe_rtl:mk_load_word_index(Reg,Block,End), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L3), + hipe_rtl:label_name(Default), 0.5), + L3, + hipe_rtl:mk_goto_index(LBlock, End, Labels) + ]; + + Length =:= 1 -> + NewLab = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + hipe_rtl:label_name(NewLab), + hipe_rtl:label_name(Default), 0.9), + NewLab, + hipe_rtl:mk_goto_index(LBlock, Start, Labels) + ] + end. + + +%% Create a jumptable +gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) -> + %% Map is a rtl mapping of Dense + {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail), + {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap), + + %% Make some labels and registers that we need. + BelowLab = hipe_rtl:mk_new_label(), + UntaggedR = hipe_rtl:mk_new_reg_gcsafe(), + StartR = hipe_rtl:mk_new_reg_gcsafe(), + + %% Generate the code to do the switch... + {[ + %% Untag the index. + hipe_tagscheme:untag_fixnum(UntaggedR, Arg)| + %% Check that the index is within Min and Max. + case Min of + 0 -> %% First element is 0 this is simple. + [hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(UntaggedR, Map)]; + _ -> %% First element is not 0 + [hipe_rtl:mk_alu(StartR, UntaggedR, sub, + hipe_rtl:mk_imm(Min)), + hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(StartR, Map)] + end], + VarMap2, + ConstTab}. + + +%% Generate the jumptable for Cases while filling in unused positions +%% with the fail label + +dense_interval(Cases, Min, IcodeFail) -> + dense_interval(Cases, Min, IcodeFail, 0, 0). +dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) -> + Val = hipe_icode:const_value(Const), + if + Pos < Val -> + {Max, Res} = + dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries), + {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]}; + true -> + {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1), + {Max, [Pair | Res]} + end; +dense_interval([], Max, _, _, _) -> + {Max-1, []}. + + +%%------------------------------------------------------------------------- +%% switch_val without jumptable +%% + +gen_slow_switch_val(I, VarMap, ConstTab, Options) -> + Is = rewrite_switch_val(I), + ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug), + hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options). + +rewrite_switch_val(I) -> + Var = hipe_icode:switch_val_term(I), + Fail = hipe_icode:switch_val_fail_label(I), + Cases = hipe_icode:switch_val_cases(I), + rewrite_switch_val_cases(Cases, Fail, Var). + +rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) -> + Tmp = hipe_icode:mk_new_var(), + NextLab = hipe_icode:mk_new_label(), + [hipe_icode:mk_move(Tmp, C), + hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L, + hipe_icode:label_name(NextLab)), + NextLab | + rewrite_switch_val_cases(Cases, Fail, Arg)]; +rewrite_switch_val_cases([], Fail, _Arg) -> + [hipe_icode:mk_goto(Fail)]. + + +%%------------------------------------------------------------------------- +%% switch_val with binary search jumptable +%% + +gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) -> + ValTableR = hipe_rtl:mk_new_reg_gcsafe(), + + {Values,_Labels} = split_cases(Cases), + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + + Code = + [hipe_rtl:mk_load_address(ValTableR, Id, constant)| + tab(Values,LabMap,Arg,ValTableR,Default)], + {Code, VarMap1, NewConstTab}. + + +%%------------------------------------------------------------------------- +%% +%% tab/5 creates RTL code for a binary search. +%% It requires two sorted tables one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% The implementation is derived from John Bentlys +%% Programming Pearls. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% (Just used to calculate the number of elements.) +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% TablePntrReg - A register containing a pointer to the +%% tables with keys +%% Default - A lable to jump to if the key is not found. +%% +%% Example: +%% KeyTbl: < a, b, d, f, h, i, z > +%% Lbls: < 5, 3, 2, 4, 1, 7, 6 > +%% Default: 8 +%% KeyReg: v37 +%% TablePntrReg: r41 +%% +%% should give code like: +%% r41 <- KeyTbl +%% r42 <- 0 +%% r43 <- [r41+16] +%% if (r43 gt v37) then L17 (0.50) else L16 +%% L16: +%% r42 <- 16 +%% goto L17 +%% L17: +%% r46 <- r42 add 16 +%% r45 <- [r41+r46] +%% if (r45 gt v37) then L21 (0.50) else L20 +%% L20: +%% r42 <- r46 +%% goto L21 +%% L21: +%% r48 <- r42 add 8 +%% r47 <- [r41+r48] +%% if (r47 gt v37) then L23 (0.50) else L22 +%% L22: +%% r42 <- r48 +%% goto L23 +%% L23: +%% r50 <- r42 add 4 +%% r49 <- [r41+r50] +%% if (r49 gt v37) then L25 (0.50) else L24 +%% L24: +%% r42 <- r42 add 4 +%% goto L25 +%% L25: +%% if (r42 gt 28) then L6 (0.50) else L18 +%% L18: +%% r44 <- [r41+r42] +%% if (r44 eq v37) then L19 (0.90) else L8 +%% L19: +%% r42 <- r42 sra 2 +%% switch (r42) <L5, L3, L2, L4, L1, +%% L7, L6> + +%% +%% The search is done like a rolled out binary search, +%% but instead of starting in the middle we start at +%% the power of two closest above the middle. +%% +%% We let IndexReg point to the lower bound of our +%% search, and then we speculatively look at a +%% position at IndexReg + I where I is a power of 2. +%% +%% Example: Looking for 'h' in +%% KeyTbl: < a, b, d, f, h, i, z > +%% +%% We start with IndexReg=0 and I=4 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'f' < 'h' so we add I to IndexReg and divide I with 2 +%% IndexReg=4 and I=2 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'i' > 'h' so we keep IndexReg and divide I with 2 +%% IndexReg=4 and I=1 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg+ I +%% Now we have found 'h' so we add I to IndexReg -> 5 +%% And we can load switch to the label at position 5 in +%% the label table. +%% +%% Now since the wordsize is 4 all numbers above are +%% Multiples of 4. + +tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) -> + %% Calculate the size of the table: + %% the number of keys * wordsize + LastOffset = (length(KeyList)-1)*?WORDSIZE, + + %% Calculate the power of two closest to the size of the table. + Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)), + + %% Create some registers and lables that we need + IndexReg = hipe_rtl:mk_new_reg_gcsafe(), + Temp = hipe_rtl:mk_new_reg_gcsafe(), + Temp2 = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + %% Calculate the position to start looking at + Init = (LastOffset)-Pow2, + + %% Create the code + [ + hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)), + hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)), + hipe_rtl:mk_branch(Temp, geu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2] ++ + + step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++ + + [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset), + hipe_rtl:label_name(Default), + hipe_rtl:label_name(Lab3), 0.5), + Lab3, + hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg), + hipe_rtl:mk_branch(Temp2, eq, KeyReg, + hipe_rtl:label_name(Lab4), + hipe_rtl:label_name(Default), 0.9), + Lab4, + hipe_rtl:mk_alu(IndexReg, IndexReg, sra, + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList) + ]. + +step(I,TablePntrReg,IndexReg,KeyReg) -> + Temp = hipe_rtl:mk_new_reg_gcsafe(), + TempIndex = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex), + hipe_rtl:mk_branch(Temp, gtu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1) , 0.5), + Lab1] ++ + case ?WORDSIZE of + I -> %% Recursive base case + [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + ]; + _ -> %% Recursion case + [hipe_rtl:mk_move(IndexReg, TempIndex), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + | step(I div 2, TablePntrReg, IndexReg, KeyReg) + ] + end. + +%%------------------------------------------------------------------------- + +lbls_from_cases([{_,L}|Rest], VarMap) -> + {Map,VarMap1} = lbls_from_cases(Rest, VarMap), + {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1), + %% {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2}; + {[hipe_rtl:label_name(RtlL)|Map],VarMap2}; +lbls_from_cases([], VarMap) -> + {[], VarMap}. + +%%------------------------------------------------------------------------- + +split_cases(L) -> + split_cases(L, [], []). + +split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)}; +split_cases([{V,L}|Rest], Vs, Ls) -> + split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]). + +%%------------------------------------------------------------------------- +%% +%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]} +%% +%% if not boxed(X) goto Fail +%% Hdr := *boxed_val(X) +%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}]) +%% where H(Ai) = make_arityval(Ai) +%% +%%------------------------------------------------------------------------- + +gen_switch_tuple(I, Map, ConstTab, _Options) -> + Var = hipe_icode:switch_tuple_arity_term(I), + {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map), + Fail0 = hipe_icode:switch_tuple_arity_fail_label(I), + {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1), + FailLab = hipe_rtl:label_name(Fail1), + {Cases, Map3} = + lists:foldr(fun({A,L}, {Rest,M}) -> + {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M), + L2 = hipe_rtl:label_name(L1), + A1 = hipe_icode:const_value(A), + H1 = hipe_tagscheme:mk_arityval(A1), + {[{H1,L2}|Rest], M1} end, + {[], Map2}, + hipe_icode:switch_tuple_arity_cases(I)), + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + IsBoxedLab = hipe_rtl:mk_new_label(), + {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), + FailLab, 0.9), + IsBoxedLab, + hipe_tagscheme:get_header(Hdr, X) | + gen_switch_int(Hdr, FailLab, Cases)], + Map3, ConstTab}. + +%% +%% RTL-level switch-on-int +%% + +gen_switch_int(X, FailLab, [{C,L}|Rest]) -> + NextLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L, + hipe_rtl:label_name(NextLab), 0.5), + NextLab | + gen_switch_int(X, FailLab, Rest)]; +gen_switch_int(_, FailLab, []) -> + [hipe_rtl:mk_goto(FailLab)]. + |