The R13B03 release.OTP_R13B03

author: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
committer: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
commit: 84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree: bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/rtl/hipe_rtl_mk_switch.erl
download: otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
1 files changed, 985 insertions, 0 deletions
diff --git a/lib/hipe/rtl/hipe_rtl_mk_switch.erl b/lib/hipe/rtl/hipe_rtl_mk_switch.erl
new file mode 100644
index 0000000000..e5175217d6
--- /dev/null
+++ b/lib/hipe/rtl/hipe_rtl_mk_switch.erl
@@ -0,0 +1,985 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Copyright (c) 2001 by Erik Johansson.  All Rights Reserved 
+%% ====================================================================
+%%  Filename : 	hipe_rtl_mk_switch.erl
+%%  Module   :	hipe_rtl_mk_switch
+%%  Purpose  :  Implements switching on Erlang values.
+%%  Notes    :  Only fixnums are supported well,
+%%              atoms work with table search, 
+%%              the inline search of atoms might have some bugs.
+%%              Should be extended to handle bignums and floats.
+%%
+%%  History  :	* 2001-02-28 Erik Johansson ([email protected]): 
+%%                Created.
+%%              * 2001-04-01 Erik Trulsson ([email protected]):
+%%                           Stefan Lindstr�m ([email protected]):
+%%                Added clustering and inlined binary search trees.
+%%              * 2001-07-30 EJ ([email protected]):
+%%                Fixed some bugs and started cleanup.
+%% ====================================================================
+%%  Exports  :
+%%    gen_switch_val(I, VarMap, ConstTab, Options)
+%%    gen_switch_tuple(I, Map, ConstTab, Options)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(hipe_rtl_mk_switch).
+
+-export([gen_switch_val/4, gen_switch_tuple/4]).
+
+%%-------------------------------------------------------------------------
+
+-include("../main/hipe.hrl").
+
+%%-------------------------------------------------------------------------
+
+-define(MINFORJUMPTABLE,9).
+	% Minimum number of integers needed to use something else than an inline search.
+-define(MINFORINTSEARCHTREE,65).  % Must be at least 3
+	% Minimum number of integer elements needed to use a non-inline binary search.
+
+-define(MININLINEATOMSEARCH,8). 
+	% Minimum number of atoms needed to use an inline binary search instead
+	% of a fast linear search.
+
+-define(MINFORATOMSEARCHTREE,20).  % Must be at least 3
+	% Minimum number of atoms needed to use a non-inline binary search instead
+	% of a linear search.
+	
+-define(MAXINLINEATOMSEARCH,64). % Must be at least 3
+	% The cutoff point between inlined and non-inlined binary search for atoms
+
+-define(WORDSIZE, hipe_rtl_arch:word_size()).
+-define(MINDENSITY, 0.5).
+        % Minimum density required to use a jumptable instead of a binary search.
+
+%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be
+%% at least 3 is that the function tab/5 will enter an infinite loop
+%% and hang when faced with a switch of size 1 or 2.
+
+
+%% Options used by this module:
+%%
+%% [no_]use_indexing
+%%    Determines if any indexing be should be done at all. Turned on
+%%    by default at optimization level o2 and higher.
+%%
+%% [no_]use_clusters
+%%    Controls whether we attempt to divide sparse integer switches
+%%    into smaller dense clusters for which jumptables are practical.
+%%    Turned off by default since it can increase compilation time
+%%    considerably and most programs will gain little benefit from it.
+%%
+%% [no_]use_inline_atom_search
+%%    Controls whether we use an inline binary search for small number
+%%    of atoms. Turned off by default since this is currently only
+%%    supported on SPARC (and not on x86) and probably needs a bit
+%%    more testing before it can be turned on by default.
+
+gen_switch_val(I, VarMap, ConstTab, Options) ->
+  case proplists:get_bool(use_indexing, Options) of
+    false -> gen_slow_switch_val(I, VarMap, ConstTab, Options);
+    true -> gen_fast_switch_val(I, VarMap, ConstTab, Options)
+  end.
+
+gen_fast_switch_val(I, VarMap, ConstTab, Options) ->
+  {Arg, VarMap0} = 
+    hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap),
+  IcodeFail = hipe_icode:switch_val_fail_label(I),
+  {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0),
+  %% Important that the list of cases is sorted when handling integers.
+  UnsortedCases = hipe_icode:switch_val_cases(I),
+  Cases = lists:sort(UnsortedCases),
+  
+  check_duplicates(Cases),
+  %% This check is currently not really necessary.  The checking
+  %% happens at an earlier phase of the compilation.
+  {Types, InitCode} = split_types(Cases, Arg),
+  handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}).
+
+handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) ->
+  {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases, 
+						 VarMap, 
+						 ConstTab, Arg, Info),
+  handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info);
+handle_types([], Code, VarMap, ConstTab, _, _) ->
+  {Code, VarMap, ConstTab}.
+
+
+gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options})  ->
+  {First,_} = hd(Cases),
+  Min = hipe_icode:const_value(First),
+  if length(Cases) < ?MINFORJUMPTABLE ->
+      gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options);
+     true ->
+      case proplists:get_bool(use_clusters, Options) of
+	false ->
+	  M = list_to_tuple(Cases),
+	  D = density(M, 1, tuple_size(M)),
+	  if 
+	    D >= ?MINDENSITY ->
+	      gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min);
+	    true ->
+	      gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options)
+	  end;
+	true ->
+	  MC = minclusters(Cases),
+	  Cl = cluster_split(Cases,MC),
+	  CM = cluster_merge(Cl),
+	  find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I))
+      end
+  end;
+gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options})  ->
+  case proplists:get_bool(use_inline_atom_search, Options) of
+    true ->
+      if
+	length(Cases) < ?MININLINEATOMSEARCH ->
+	  gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
+	length(Cases) > ?MAXINLINEATOMSEARCH ->
+	  gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
+	true ->
+	  gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options)
+      end;
+    false ->
+      if length(Cases) < ?MINFORATOMSEARCHTREE ->
+	  gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
+	 true ->
+	  gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options)
+      end
+  end;	
+gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options})  ->
+  %% We can only handle smart indexing of integers and atoms
+  %% TODO: Consider bignum
+  gen_slow_switch_val(I, VarMap, ConstTab, Options).
+
+
+%% Split different types into separate switches.
+split_types([Case|Cases], Arg) ->
+  Type1 = casetype(Case),
+  Types = split(Cases,Type1,[Case],[]),
+  switch_on_types(Types,[], [], Arg);
+split_types([],_) ->
+  %% Cant happen.
+  ?EXIT({empty_caselist}).
+
+switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) ->
+  Lbl = hipe_rtl:mk_new_label(),
+  I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)),
+  {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])};
+switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) ->
+  %% Make sure the general case is handled last.
+  switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg);
+switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) ->
+  TLab = hipe_rtl:mk_new_label(),
+  FLab = hipe_rtl:mk_new_label(),
+  TestCode = 
+    case Type of
+      integer ->
+	hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab), 
+				   hipe_rtl:label_name(FLab), 0.5);
+      atom ->
+	hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab), 
+				 hipe_rtl:label_name(FLab), 0.5);
+      bignum ->
+	hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab), 
+				   hipe_rtl:label_name(FLab), 0.5);
+      _ -> ?EXIT({ooops, type_not_handled, Type})
+    end,
+  switch_on_types(Rest, [[TestCode,FLab] | AccCode],
+		  [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg).
+
+split([Case|Cases], Type, Current, Rest) ->
+  case casetype(Case) of
+    Type ->
+      split(Cases, Type, [Case|Current],Rest);
+    Other ->
+      split(Cases, Other, [Case], [{Type,Current}|Rest])
+  end;
+split([], Type, Current, Rest) ->
+  [{Type, Current} | Rest].
+
+%% Determine what type an entry in the caselist has
+
+casetype({Const,_}) ->
+  casetype(hipe_icode:const_value(Const));
+casetype(A) ->
+  if
+    is_integer(A) -> 
+      case hipe_tagscheme:is_fixnum(A) of
+	true -> integer;
+	false -> bignum
+      end;
+    is_float(A) -> float;
+    is_atom(A) -> atom;
+    true -> other
+  end.
+
+%% check that no duplicate values occur in the case list and also
+%% check that all case values have the same type.
+check_duplicates([]) -> true;
+check_duplicates([_]) -> true;
+check_duplicates([{Const1,_},{Const2,L2}|T]) ->
+  C1 = hipe_icode:const_value(Const1),
+  C2 = hipe_icode:const_value(Const2),
+  %%	T1 = casetype(C1),
+  %%	T2 = casetype(C2),
+  if C1 =/= C2 -> %% , T1 =:= T2 ->
+      check_duplicates([{Const2,L2}|T]);
+     true ->
+      ?EXIT({bad_values_in_switchval,C1})
+  end.
+
+%%
+%% Determine the optimal way to divide Cases into clusters such that each 
+%% cluster is dense.
+%%
+%% See:
+%%  Producing Good Code for the Case Statement, Robert L. Bernstein
+%%  Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024
+%% And
+%%  Correction to "Producing Good Code for the Case Statement"
+%%  Sampath Kannan and Todd A. Proebsting,
+%%  Software - Practice and Experience vol 24, 1994, no 2, p 233
+%%
+%% (The latter is where the algorithm comes from.)
+
+%% This function will return a tuple with the first element being 0
+%% The rest of the elements being integers. A value of M at index N
+%% (where the first element is considered to have index 0) means that
+%% the first N cases can be divided into M (but no fewer) clusters where
+%% each cluster is dense.
+
+minclusters(Cases) when is_list(Cases) ->
+  minclusters(list_to_tuple(Cases));
+minclusters(Cases) when is_tuple(Cases) ->
+  N = tuple_size(Cases),
+  MinClusters = list_to_tuple([0|n_list(N,inf)]),
+  i_loop(1,N,MinClusters,Cases).
+
+%% Create a list with N elements initialized to Init
+n_list(0,_) -> [];
+n_list(N,Init) -> [Init | n_list(N-1,Init)].
+
+%% Do the dirty work of minclusters
+i_loop(I,N,MinClusters,_Cases) when I > N -> 
+  MinClusters;
+i_loop(I,N,MinClusters,Cases) when I =< N ->
+  M = j_loop(0, I-1, MinClusters, Cases),
+  i_loop(I+1, N, M, Cases).
+
+%% More dirty work	
+j_loop(J,I1,MinClusters,_Cases) when J > I1 ->
+  MinClusters;
+j_loop(J,I1,MinClusters,Cases) when J =< I1 ->
+  D = density(Cases,J+1,I1+1),
+  A0 = element(J+1,MinClusters),
+  A = if
+	is_number(A0) ->
+	  A0+1;
+	true ->
+	  A0
+      end,
+  B = element(I1+2,MinClusters),
+  M = if
+	D >= ?MINDENSITY, A<B ->
+	  setelement(I1+2,MinClusters,A);
+	true ->
+	  MinClusters
+      end,
+  j_loop(J+1,I1,M,Cases).
+
+
+%% Determine the density of a (subset of a) case list
+%% A is a tuple with the cases in order from smallest to largest
+%% I is the index of the first element and J of the last
+
+density(A,I,J) ->
+  {AI,_} = element(I,A),
+  {AJ,_} = element(J,A),
+  (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1).
+
+
+%% Split a case list into dense clusters
+%% Returns a list of lists of cases.
+%%
+%% Cases is the case list and Clust is a list describing the optimal
+%% clustering as returned by minclusters
+%%
+%% If the value in the last place in minclusters is M then we can
+%% split the case list into M clusters. We then search for the last
+%% (== right-most) occurance of the value M-1 in minclusters. That
+%% indicates the largest number of cases that can be split into M-1
+%% clusters. This means that the cases in between constitute one
+%% cluster. Then we recurse on the remainder of the cases.
+%%
+%% The various calls to lists:reverse are just to ensure that the
+%% cases remain in the correct, sorted order.
+
+cluster_split(Cases, Clust) ->
+  A = tl(tuple_to_list(Clust)),
+  Max = element(tuple_size(Clust), Clust),
+  L1 = lists:reverse(Cases),
+  L2 = lists:reverse(A),
+  cluster_split(Max, [], [], L1, L2).
+
+cluster_split(0, [], Res, Cases, _Clust) -> 
+  L = lists:reverse(Cases),
+  {H,_} = hd(L),
+  {T,_} = hd(Cases),
+  [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res];
+cluster_split(N, [], Res, Cases, [N|_] = Clust) -> 
+  cluster_split(N-1, [], Res, Cases, Clust);
+cluster_split(N,Sofar,Res,Cases,[N|Clust]) -> 
+  {H,_} = hd(Sofar),
+  {T,_} = lists:last(Sofar),
+  cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]);
+cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) ->
+  cluster_split(N,[C|Sofar],Res,Cases,Clust).
+
+%%
+%% Merge adjacent small clusters into larger sparse clusters
+%%
+cluster_merge([C]) -> [C];
+cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE ->
+  C2 = cluster_merge(T),
+  [{dense,Min,Max,C}|C2];
+cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) ->
+  R = {sparse,Min,Max,C ++ D},
+  cluster_merge([R|T]);
+cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE ->
+  R = {sparse,Min,Max,C ++ D},
+  cluster_merge([R|T]);
+cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE ->
+  R = {sparse,Min,Max,C ++ D},
+  cluster_merge([R|T]);
+cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE ->
+  R = {sparse,Min,Max,C ++ D},
+  cluster_merge([R|T]);
+cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE ->
+  R = cluster_merge([{dense,Min,Max,C}|T]),
+  [A|R].
+
+
+%% Generate code to search for the correct cluster
+
+find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) ->
+  case length(C) < ?MINFORINTSEARCHTREE of
+    true ->
+      gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options);
+    _ ->
+      gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options)
+  end;
+find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) ->	
+  case length(C) < ?MINFORJUMPTABLE of
+    true ->
+      gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options);
+    _ ->
+      gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min)
+  end;
+find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) ->
+  ClustLab = hipe_rtl:mk_new_label(),
+  NextLab = hipe_rtl:mk_new_label(),
+  {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail),
+
+  {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail),
+  
+  {[
+    hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)),
+		       hipe_rtl:label_name(NextLab), 
+		       hipe_rtl:label_name(ClustLab), 0.50),	
+    ClustLab
+   ] ++	
+   ClustCode ++
+   [NextLab] ++
+   Rest,
+   V2,C2}.
+
+%% Generate efficient code for a linear search through the case list.
+%% Only works for atoms and integer.
+gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) ->
+  {Values,_Labels} = split_cases(Cases),
+  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
+  Code = fast_linear_search(Arg,Values,LabMap,Fail),
+  {Code,VarMap1,ConstTab}.
+
+fast_linear_search(_Arg,[],[],Fail) ->
+  [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))];
+fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) ->
+  Reg = hipe_rtl:mk_new_reg_gcsafe(),
+  NextLab = hipe_rtl:mk_new_label(),
+  C2 = fast_linear_search(Arg,Cases,Labels,Fail),
+  C1 =
+    if
+      is_integer(Case) ->
+	TVal = hipe_tagscheme:mk_fixnum(Case),
+	[
+	 hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)),
+	 hipe_rtl:mk_branch(Arg,eq,Reg,
+			    Label,
+			    hipe_rtl:label_name(NextLab), 0.5),
+	 NextLab
+	];
+      is_atom(Case) ->
+	[
+	 hipe_rtl:mk_load_atom(Reg,Case),
+	 hipe_rtl:mk_branch(Arg,eq,Reg,
+			    Label,
+			    hipe_rtl:label_name(NextLab), 0.5),
+	 NextLab
+	];
+      true ->   % This should never happen !
+	?EXIT({internal_error_in_switch_val,Case})
+    end,
+  [C1,C2].
+
+
+%% Generate code to search through a small cluster of integers using
+%% binary search
+gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> 
+  {Values,_Labels} = split_cases(Cases),
+  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
+  Keys = [hipe_tagscheme:mk_fixnum(X)    % Add tags to the values
+	  || X <- Values],
+  Code = inline_search(Keys, LabMap, Arg, Fail),
+  {Code, VarMap1, ConstTab}.
+
+
+%% Generate code to search through a small cluster of atoms
+gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> 
+  {Values, _Labels} = split_cases(Cases),
+  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
+  LMap = [{label,L} || L <- LabMap],
+  {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values),
+  {NewConstTab2,LabId} =
+    hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values),
+  Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap),
+  {Code, VarMap1, NewConstTab2}.
+
+
+%% calculate the middle position of a list (+ 1 because of 1-indexing of lists)
+get_middle(List) ->
+  N = length(List),
+  N div 2 + 1.
+
+%% get element [N1, N2] from a list
+get_cases(_, 0, 0) ->
+  [];
+get_cases([H|T], 0, N) ->
+  [H | get_cases(T, 0, N - 1)];
+get_cases([_|T], N1, N2) ->
+  get_cases(T, N1 - 1, N2 - 1).
+
+
+%% inline_search/4 creates RTL code for a inlined binary search.
+%% It requires two sorted tables - one with the keys to search
+%% through and one with the corresponding labels to jump to.
+%%    
+%% Input: 
+%%  KeyList - A list of keys to search through. 
+%%  LableList - A list of labels to jump to.
+%%  KeyReg - A register containing the key to search for.
+%%  Default - A label to jump to if the key is not found.
+%%
+
+inline_search([], _LabelList, _KeyReg, _Default) -> [];
+inline_search(KeyList, LabelList, KeyReg, Default) ->
+  %% Create some registers and labels that we need.
+  Reg = hipe_rtl:mk_new_reg_gcsafe(),
+  Lab1 = hipe_rtl:mk_new_label(),
+  Lab2 = hipe_rtl:mk_new_label(),
+  Lab3 = hipe_rtl:mk_new_label(),
+  
+  Length = length(KeyList),
+  
+  if
+    Length >= 3 ->
+      %% Get middle element and keys/labels before that and after
+      Middle_pos = get_middle(KeyList),
+      Middle_key = lists:nth(Middle_pos, KeyList),
+      Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1),
+      Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1),
+      Keys_ending = get_cases(KeyList, Middle_pos, Length),
+      Labels_ending = get_cases(LabelList, Middle_pos, Length),
+      
+      %% Create the code.
+      
+      %% Get the label and build it up properly
+      Middle_label = lists:nth(Middle_pos, LabelList),
+      
+      A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)),
+	   hipe_rtl:mk_branch(KeyReg, lt, Reg, 
+			      hipe_rtl:label_name(Lab2), 
+			      hipe_rtl:label_name(Lab1), 0.5),
+	   Lab1,
+	   hipe_rtl:mk_branch(KeyReg, gt, Reg,
+			      hipe_rtl:label_name(Lab3), 
+			      Middle_label , 0.5),
+	   Lab2],
+      %% build search tree for keys less than the middle element
+      B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default),
+      %% ...and for keys bigger than the middle element
+      D = inline_search(Keys_ending, Labels_ending, KeyReg, Default),
+      
+      %% append the code and return it
+      A ++ B ++ [Lab3] ++ D;
+    
+    Length =:= 2 ->
+      %% get the first and second elements and theirs labels
+      Key_first = hd(KeyList),
+      First_label = hd(LabelList),
+      
+      %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)),
+      Key_second = lists:nth(2, KeyList),
+      Second_label = lists:nth(2, LabelList),
+      
+      NewLab = hipe_rtl:mk_new_label(),
+      
+      %% compare them
+      A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)),
+	   hipe_rtl:mk_branch(KeyReg, eq, Reg,
+			      First_label, 
+			      hipe_rtl:label_name(NewLab) , 0.5),
+	   NewLab],
+      
+      B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)),
+	   hipe_rtl:mk_branch(KeyReg, eq, Reg,
+			      Second_label, 
+			      hipe_rtl:label_name(Default) , 0.5)],
+      A ++ B;
+    
+    Length =:= 1 ->
+      Key = hd(KeyList),
+      Label = hd(LabelList),
+      
+      [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)),
+       hipe_rtl:mk_branch(KeyReg, eq, Reg,
+			  Label, 
+			  hipe_rtl:label_name(Default) , 0.5)]
+  end.
+
+
+inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) ->
+  Reg = hipe_rtl:mk_new_reg_gcsafe(),
+  
+  Length = (End - Start) + 1,
+  
+  if
+    Length >= 3 ->
+      Lab1 = hipe_rtl:mk_new_label(),
+      Lab2 = hipe_rtl:mk_new_label(),
+      Lab3 = hipe_rtl:mk_new_label(),
+      Lab4 = hipe_rtl:mk_new_label(),
+      
+      Mid = ((End-Start) div 2)+Start,
+      End1 = Mid-1,
+      Start1 = Mid+1,
+      A = [
+	   hipe_rtl:mk_load_word_index(Reg,Block,Mid),
+	   hipe_rtl:mk_branch(KeyReg, lt, Reg,
+			      hipe_rtl:label_name(Lab2),
+			      hipe_rtl:label_name(Lab1), 0.5),
+	   Lab1,
+	   hipe_rtl:mk_branch(KeyReg, gt, Reg,
+			      hipe_rtl:label_name(Lab3),
+			      hipe_rtl:label_name(Lab4), 0.5),
+	   Lab4,
+	   hipe_rtl:mk_goto_index(LBlock, Mid, Labels),
+	   Lab2
+	  ],
+      B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)],
+      C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)],
+      A ++ B ++ [Lab3] ++ C;
+    
+    Length =:= 2 ->
+      L1 = hipe_rtl:mk_new_label(),
+      L2 = hipe_rtl:mk_new_label(),
+      L3 = hipe_rtl:mk_new_label(),
+      [
+       hipe_rtl:mk_load_word_index(Reg,Block,Start),
+       hipe_rtl:mk_branch(KeyReg,eq,Reg,
+			  hipe_rtl:label_name(L1),
+			  hipe_rtl:label_name(L2), 0.5),
+       L1,
+       hipe_rtl:mk_goto_index(LBlock,Start,Labels),
+       
+       L2,
+       hipe_rtl:mk_load_word_index(Reg,Block,End),
+       hipe_rtl:mk_branch(KeyReg,eq,Reg,
+			  hipe_rtl:label_name(L3),
+			  hipe_rtl:label_name(Default), 0.5),
+       L3,
+       hipe_rtl:mk_goto_index(LBlock, End, Labels)
+      ];
+    
+    Length =:= 1 ->
+      NewLab = hipe_rtl:mk_new_label(),
+      [
+       hipe_rtl:mk_load_word_index(Reg,Block,Start),
+       hipe_rtl:mk_branch(KeyReg, eq, Reg,
+			  hipe_rtl:label_name(NewLab),
+			  hipe_rtl:label_name(Default), 0.9),
+       NewLab,
+       hipe_rtl:mk_goto_index(LBlock, Start, Labels)
+      ]
+  end.
+
+
+%% Create a jumptable
+gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) ->
+  %% Map is a rtl mapping of Dense
+  {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail),
+  {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap),
+  
+  %% Make some labels and registers that we need.
+  BelowLab = hipe_rtl:mk_new_label(),
+  UntaggedR = hipe_rtl:mk_new_reg_gcsafe(),
+  StartR = hipe_rtl:mk_new_reg_gcsafe(),
+  
+  %% Generate the code to do the switch...
+  {[
+    %% Untag the index.
+    hipe_tagscheme:untag_fixnum(UntaggedR, Arg)|
+    %% Check that the index is within Min and Max.
+    case Min of
+      0 -> %% First element is 0 this is simple.
+	[hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max),
+			    hipe_rtl:label_name(Fail), 
+			    hipe_rtl:label_name(BelowLab), 0.01),
+	 BelowLab,
+	 %% StartR contains the index into the jumptable
+	 hipe_rtl:mk_switch(UntaggedR, Map)];
+      _ -> %% First element is not 0 
+	[hipe_rtl:mk_alu(StartR, UntaggedR, sub, 
+			 hipe_rtl:mk_imm(Min)), 
+	 hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min),
+			    hipe_rtl:label_name(Fail), 
+			    hipe_rtl:label_name(BelowLab), 0.01),
+	 BelowLab,
+	 %% StartR contains the index into the jumptable
+	 hipe_rtl:mk_switch(StartR, Map)]
+    end],
+   VarMap2,
+   ConstTab}.
+
+
+%% Generate the jumptable for Cases while filling in unused positions
+%% with the fail label
+
+dense_interval(Cases, Min, IcodeFail) ->
+  dense_interval(Cases, Min, IcodeFail, 0, 0).
+dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) ->
+  Val = hipe_icode:const_value(Const),
+  if 
+    Pos < Val ->
+      {Max, Res} = 
+	dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries),
+      {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]};
+    true ->
+      {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1),
+      {Max, [Pair | Res]}
+  end;
+dense_interval([], Max, _, _, _) -> 
+  {Max-1, []}.
+
+
+%%-------------------------------------------------------------------------
+%% switch_val without jumptable
+%%
+
+gen_slow_switch_val(I, VarMap, ConstTab, Options) ->
+  Is = rewrite_switch_val(I),
+  ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug),
+  hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options).
+
+rewrite_switch_val(I) ->
+  Var = hipe_icode:switch_val_term(I),
+  Fail = hipe_icode:switch_val_fail_label(I),
+  Cases = hipe_icode:switch_val_cases(I),
+  rewrite_switch_val_cases(Cases, Fail, Var).
+
+rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) ->
+  Tmp = hipe_icode:mk_new_var(),
+  NextLab = hipe_icode:mk_new_label(),
+  [hipe_icode:mk_move(Tmp, C),
+   hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L,
+		    hipe_icode:label_name(NextLab)),
+   NextLab |
+   rewrite_switch_val_cases(Cases, Fail, Arg)];
+rewrite_switch_val_cases([], Fail, _Arg) ->
+  [hipe_icode:mk_goto(Fail)].
+
+
+%%-------------------------------------------------------------------------
+%% switch_val with binary search jumptable
+%%
+
+gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) ->
+  ValTableR = hipe_rtl:mk_new_reg_gcsafe(),
+
+  {Values,_Labels} = split_cases(Cases),
+  {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values),
+  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
+  
+  Code = 
+    [hipe_rtl:mk_load_address(ValTableR, Id, constant)|
+     tab(Values,LabMap,Arg,ValTableR,Default)],
+  {Code, VarMap1, NewConstTab}.
+
+
+%%-------------------------------------------------------------------------
+%%
+%% tab/5 creates RTL code for a binary search.
+%% It requires two sorted tables one with the keys to search
+%% through and one with the corresponding labels to jump to.
+%%
+%% The implementation is derived from John Bentlys
+%% Programming Pearls.
+%%
+%% Input: 
+%%  KeyList - A list of keys to search through. 
+%%           (Just used to calculate the number of elements.)
+%%  LableList - A list of labels to jump to.
+%%  KeyReg - A register containing the key to search for.
+%%  TablePntrReg - A register containing a pointer to the
+%%                tables with keys
+%%  Default - A lable to jump to if the key is not found.
+%%
+%% Example:
+%%  KeyTbl: < a, b, d, f, h, i, z >
+%%  Lbls:   < 5, 3, 2, 4, 1, 7, 6 >
+%%  Default: 8
+%%  KeyReg: v37
+%%  TablePntrReg: r41 
+%%
+%%  should give code like:
+%%    r41 <- KeyTbl
+%%    r42 <- 0
+%%    r43 <- [r41+16]
+%%    if (r43 gt v37) then L17 (0.50) else L16
+%% L16:
+%%    r42 <- 16
+%%    goto L17
+%% L17: 
+%%    r46 <- r42 add 16
+%%    r45 <- [r41+r46]
+%%    if (r45 gt v37) then L21 (0.50) else L20
+%% L20: 
+%%    r42 <- r46
+%%    goto L21
+%% L21: 
+%%    r48 <- r42 add 8
+%%    r47 <- [r41+r48]
+%%    if (r47 gt v37) then L23 (0.50) else L22
+%% L22: 
+%%    r42 <- r48
+%%    goto L23
+%% L23: 
+%%    r50 <- r42 add 4
+%%    r49 <- [r41+r50]
+%%    if (r49 gt v37) then L25 (0.50) else L24
+%% L24: 
+%%    r42 <- r42 add 4
+%%    goto L25
+%% L25: 
+%%    if (r42 gt 28) then L6 (0.50) else L18
+%% L18: 
+%%    r44 <- [r41+r42]
+%%    if (r44 eq v37) then L19 (0.90) else L8
+%% L19: 
+%%    r42 <- r42 sra 2
+%%    switch (r42) <L5, L3, L2, L4, L1, 
+%%              L7, L6>
+
+%%   
+%% The search is done like a rolled out binary search,
+%% but instead of starting in the middle we start at 
+%% the power of two closest above the middle.
+%%
+%% We let IndexReg point to the lower bound of our
+%% search, and then we speculatively look at a 
+%% position at IndexReg + I where I is a power of 2.
+%%
+%% Example: Looking for 'h' in 
+%%  KeyTbl: < a, b, d, f, h, i, z >
+%%
+%%  We start with IndexReg=0 and I=4
+%%          < a, b, d, f, h, i, z >
+%%            ^        ^
+%%     IndexReg    +   I
+%%
+%%  'f' < 'h' so we add I to IndexReg and divide I with 2
+%%  IndexReg=4 and I=2
+%%          < a, b, d, f, h, i, z >
+%%                     ^     ^
+%%              IndexReg +  I
+%%
+%%  'i' > 'h' so we keep IndexReg and divide I with 2
+%%  IndexReg=4 and I=1
+%%          < a, b, d, f, h, i, z >
+%%                     ^  ^
+%%              IndexReg+ I
+%%  Now we have found 'h' so we add I to IndexReg -> 5
+%%  And we can load switch to the label at position 5 in
+%%  the label table.
+%%
+%%  Now since the wordsize is 4 all numbers above are 
+%%  Multiples of 4.
+
+tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) ->
+  %% Calculate the size of the table: 
+  %%  the number of keys * wordsize
+  LastOffset = (length(KeyList)-1)*?WORDSIZE,
+
+  %% Calculate the power of two closest to the size of the table.
+  Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)),
+
+  %% Create some registers and lables that we need
+  IndexReg = hipe_rtl:mk_new_reg_gcsafe(),
+  Temp = hipe_rtl:mk_new_reg_gcsafe(),
+  Temp2 = hipe_rtl:mk_new_reg_gcsafe(),
+  Lab1 = hipe_rtl:mk_new_label(),
+  Lab2 = hipe_rtl:mk_new_label(),
+  Lab3 = hipe_rtl:mk_new_label(),
+  Lab4 = hipe_rtl:mk_new_label(),
+  
+  %% Calculate the position to start looking at
+  Init = (LastOffset)-Pow2,
+
+  %% Create the code
+  [
+   hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)),
+   hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)),
+   hipe_rtl:mk_branch(Temp, geu, KeyReg,
+		      hipe_rtl:label_name(Lab2), 
+		      hipe_rtl:label_name(Lab1), 0.5),
+   Lab1,
+   hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)),
+   hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
+   Lab2] ++
+
+    step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++
+
+    [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset),
+		       hipe_rtl:label_name(Default), 
+		       hipe_rtl:label_name(Lab3), 0.5),
+     Lab3,
+     hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg),
+     hipe_rtl:mk_branch(Temp2, eq, KeyReg,
+			hipe_rtl:label_name(Lab4), 
+			hipe_rtl:label_name(Default), 0.9),
+     Lab4,
+     hipe_rtl:mk_alu(IndexReg, IndexReg, sra,
+                     hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())),
+     hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList)
+    ].
+
+step(I,TablePntrReg,IndexReg,KeyReg) ->
+  Temp = hipe_rtl:mk_new_reg_gcsafe(),
+  TempIndex = hipe_rtl:mk_new_reg_gcsafe(),
+  Lab1 = hipe_rtl:mk_new_label(),
+  Lab2 = hipe_rtl:mk_new_label(),
+  [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)),
+   hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex),
+   hipe_rtl:mk_branch(Temp, gtu, KeyReg,
+                      hipe_rtl:label_name(Lab2), 
+                      hipe_rtl:label_name(Lab1) , 0.5),
+   Lab1] ++
+    case ?WORDSIZE of
+      I -> %% Recursive base case
+        [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)),
+         hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
+         Lab2
+        ];
+      _ -> %% Recursion case
+        [hipe_rtl:mk_move(IndexReg, TempIndex),
+         hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
+         Lab2
+         | step(I div 2, TablePntrReg, IndexReg, KeyReg)
+        ]
+    end.
+
+%%-------------------------------------------------------------------------
+
+lbls_from_cases([{_,L}|Rest], VarMap) ->
+  {Map,VarMap1} = lbls_from_cases(Rest, VarMap),
+  {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1),
+  %%  {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2};
+  {[hipe_rtl:label_name(RtlL)|Map],VarMap2};
+lbls_from_cases([], VarMap) ->
+  {[], VarMap}.
+
+%%-------------------------------------------------------------------------
+
+split_cases(L) -> 
+  split_cases(L, [], []).
+
+split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)};
+split_cases([{V,L}|Rest], Vs, Ls) ->
+  split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]).
+
+%%-------------------------------------------------------------------------
+%%
+%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]}
+%%
+%% if not boxed(X) goto Fail
+%% Hdr := *boxed_val(X)
+%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}])
+%% where H(Ai) = make_arityval(Ai)
+%% 
+%%-------------------------------------------------------------------------
+
+gen_switch_tuple(I, Map, ConstTab, _Options) ->
+  Var = hipe_icode:switch_tuple_arity_term(I),
+  {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map),
+  Fail0 = hipe_icode:switch_tuple_arity_fail_label(I),
+  {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1),
+  FailLab = hipe_rtl:label_name(Fail1),
+  {Cases, Map3} =
+    lists:foldr(fun({A,L}, {Rest,M}) ->
+		    {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M),
+		    L2 = hipe_rtl:label_name(L1),
+		    A1 = hipe_icode:const_value(A),
+		    H1 = hipe_tagscheme:mk_arityval(A1),
+		    {[{H1,L2}|Rest], M1} end,
+		{[], Map2},
+		hipe_icode:switch_tuple_arity_cases(I)),
+  Hdr = hipe_rtl:mk_new_reg_gcsafe(),
+  IsBoxedLab = hipe_rtl:mk_new_label(),
+  {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab),
+				 FailLab, 0.9),
+    IsBoxedLab,
+    hipe_tagscheme:get_header(Hdr, X) |
+    gen_switch_int(Hdr, FailLab, Cases)],
+   Map3, ConstTab}.
+
+%%
+%% RTL-level switch-on-int
+%%
+
+gen_switch_int(X, FailLab, [{C,L}|Rest]) ->
+  NextLab = hipe_rtl:mk_new_label(),
+  [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L,
+		      hipe_rtl:label_name(NextLab), 0.5),
+   NextLab |
+   gen_switch_int(X, FailLab, Rest)];
+gen_switch_int(_, FailLab, []) ->
+  [hipe_rtl:mk_goto(FailLab)].
+
author	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
committer	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
commit	84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree	bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/rtl/hipe_rtl_mk_switch.erl
download	otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip