1 files changed, 237 insertions, 0 deletions
diff --git a/erts/emulator/internal_doc/dec.erl b/erts/emulator/internal_doc/dec.erl
new file mode 100644
index 0000000000..0315f2a52d
--- /dev/null
+++ b/erts/emulator/internal_doc/dec.erl
@@ -0,0 +1,237 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2000-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% This program is used to generate a header file with data for
+%% normalizing denormalized unicode.
+
+%% The C header is generated from a text file containing tuples in the 
+%% following format:
+%% {RevList,Translation}
+%% Where 'RevList' is a reversed list of the denormalized repressentation of
+%% the character 'Translation'. An example would be the swedish character 
+%% '�', which would be represented in the file as:
+%% {[776,111],246}, as the denormalized representation of codepoint 246
+%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
+%% while '�' instead is represented as {[776,97],228}, as the denormalized 
+%% form would be [97,776] (same accent but an 'a' instead).
+%% The datafile is generated from the table on Apple's developer connection
+%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
+%% The generating is done whenever new data is present (i.e. dec.dat has 
+%% to be changed) and not for every build. The product (the C header) is copied
+%% to $ERL_TOP/erts/beam after generation and checked in.
+%% The program and the data file is included for reference.
+
+-module(dec).
+
+-compile(export_all).
+
+-define(HASH_SIZE_FACTOR,2).
+-define(BIG_PREFIX_SIZE,392).
+
+-define(INPUT_FILE_NAME,"dec.dat").
+-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
+
+read(FName) ->
+    {ok,L} = file:consult(FName),
+    [{A,B} || {A,B} <- L,
+	      length(A) > 1% , hd(A) < 769
+		 ].
+
+dec() ->
+    L = read(?INPUT_FILE_NAME),
+    G = group(L),
+    {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
+    io:format
+      (Out,
+       "/*~n"
+       "* %CopyrightBegin%~n"
+       "*~n"
+       "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
+       "*~n"
+       "* The contents of this file are subject to the Erlang Public License,~n"
+       "* Version 1.1, (the \"License\"); you may not use this file except in~n"
+       "* compliance with the License. You should have received a copy of the~n"
+       "* Erlang Public License along with this software. If not, it can be~n"
+       "* retrieved online at http://www.erlang.org/.~n"
+       "*~n"
+       "* Software distributed under the License is distributed on an "
+       "\"AS IS\"~n"
+       "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n"
+       "* the License for the specific language governing rights and "
+       "limitations~n"
+       "* under the License.~n"
+       "*~n"
+       "* %CopyrightEnd%~n"
+       "*/~n"
+       "/*~n"
+       "* This file is automatically generated by ~p.erl, "
+       "do not edit manually~n"
+       "*/~n",
+       [?MODULE]),
+
+    io:format(Out,
+	      "#define HASH_SIZE_FACTOR ~w~n"
+	      "typedef struct _compose_entry {~n"
+	      "    Uint16 c;~n"
+	      "    Uint16 res;~n"
+	      "    Uint16 num_subs;~n"
+	      "    struct _compose_entry *subs;~n"
+	      "    int *hash;~n"
+	      "} CompEntry;~n~n"
+	      "static int compose_tab_size = ~p;~n", 
+	      [?HASH_SIZE_FACTOR,length(G)]),
+    d(Out,G,[],0),
+    PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
+    dump_prefixes(Out,PreTab),
+%% Using this cuts down on the searching in the
+%% actual implementation, but wastes memory with little real gain..
+%%    LL = lists:flatten([PartList || {PartList,_} <- L]),
+%%    BigPreTab = tuple_to_list(
+%%		  make_big_prefixes(LL,
+%%				    erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
+%%    dump_big_prefixes(Out,BigPreTab),
+    file:close(Out),
+    ok.
+    
+   
+
+d(Out,List,D,C) ->
+    d_sub(Out,List,D,C),
+    d_top_hash(Out,List,D,C),
+    d_top(Out,List,D,C).
+d_sub(_Out,[],_D,_C) ->
+    ok;
+d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
+    d_sub(Out,T,D,C+1);
+d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
+    d(Out,Subs,[C|D],0),
+    d_sub(Out,T,D,C+1).
+d_top(Out,L,D,C) ->
+    io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
+    d_top_1(Out,L,D,C),
+    io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
+    
+d_top_1(_Out,[],_D,_C) ->
+    ok;
+d_top_1(Out,[{CP,[],Res}|T],D,C) ->
+    io:format(Out,
+	          "{~w, ~w, 0, NULL, NULL}",[CP,Res]),
+    if 
+	T =:= [] ->
+	    io:format(Out,"~n",[]);
+	true ->
+	    io:format(Out,",~n",[])
+    end,
+    d_top_1(Out,T,D,C+1);
+d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
+    io:format(Out,
+	          "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
+					  format_depth([C|D]),
+					 "hash_"++format_depth([C|D])]),
+    if 
+	T =:= [] ->
+	    io:format(Out,"~n",[]);
+	true ->
+	    io:format(Out,",~n",[])
+    end,
+    d_top_1(Out,T,D,C+1).
+
+
+d_top_hash(Out,List,D,_C) ->
+     HSize = length(List)*?HASH_SIZE_FACTOR,
+     io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
+     Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
+     io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
+
+d_top_hash_1([],_,Hash,_HSize) -> 
+    Hash;
+d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
+    Bucket = hash_search(Hash,HSize,CP rem HSize),
+    d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
+
+hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
+    Bucket;
+hash_search(Hash,HSize,Bucket) ->
+    hash_search(Hash,HSize,(Bucket + 1) rem HSize). 
+
+format_depth(D) ->
+    lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
+
+
+
+
+make_prefix_table([],Table) ->
+    Table;
+make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
+    Index = (C div 32) + 1 - 24,
+    Pos = C rem 32,
+    X = element(Index,Table),
+    Y = X bor (1 bsl Pos),
+    NewTab = setelement(Index,Table,Y),
+    make_prefix_table(T,NewTab);
+make_prefix_table([_|T],Tab) ->
+    make_prefix_table(T,Tab).
+
+dump_prefixes(Out,L) ->
+    io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
+    io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
+    dump_prefixes_1(Out,L).
+dump_prefixes_1(Out,[H]) ->
+    io:format(Out,"    0x~8.16.0BU~n",[H]),
+    io:format(Out,"};~n",[]);
+dump_prefixes_1(Out,[H|T]) ->
+    io:format(Out,"    0x~8.16.0BU,~n",[H]),
+    dump_prefixes_1(Out,T).
+
+%% make_big_prefixes([],Table) ->
+%%     Table;
+%% make_big_prefixes([C|T],Table) ->
+%%     Index = (C div 32) + 1,
+%%     Pos = C rem 32,
+%%     X = element(Index,Table),
+%%     Y = X bor (1 bsl Pos),
+%%     NewTab = setelement(Index,Table,Y),
+%%     make_big_prefixes(T,NewTab).
+
+%% dump_big_prefixes(Out,L) ->
+%%     io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
+%%     io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
+%%     dump_prefixes_1(Out,L).
+   
+pick([],_,Acc) ->
+    {lists:reverse(Acc),[]};
+pick([{[H|TT],N}|T],H,Acc) ->
+    pick(T,H,[{TT,N}|Acc]);
+pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
+    {lists:reverse(Acc),L}.
+    
+
+group([]) ->
+    [];
+group([{[H],N}|T]) ->
+    {Part,Rest} = pick(T,H,[]),
+    [{H,group(Part),N}| group(Rest)];
+group([{[H|_],_}|_]=L) ->
+    {Part,Rest} = pick(L,H,[]),
+    [{H,group(Part),0}| group(Rest)].
+    
+    
+
+
+