aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/internal_doc/dec.erl
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/internal_doc/dec.erl')
-rw-r--r--erts/emulator/internal_doc/dec.erl237
1 files changed, 237 insertions, 0 deletions
diff --git a/erts/emulator/internal_doc/dec.erl b/erts/emulator/internal_doc/dec.erl
new file mode 100644
index 0000000000..0315f2a52d
--- /dev/null
+++ b/erts/emulator/internal_doc/dec.erl
@@ -0,0 +1,237 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2000-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% This program is used to generate a header file with data for
+%% normalizing denormalized unicode.
+
+%% The C header is generated from a text file containing tuples in the
+%% following format:
+%% {RevList,Translation}
+%% Where 'RevList' is a reversed list of the denormalized repressentation of
+%% the character 'Translation'. An example would be the swedish character
+%% '�', which would be represented in the file as:
+%% {[776,111],246}, as the denormalized representation of codepoint 246
+%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
+%% while '�' instead is represented as {[776,97],228}, as the denormalized
+%% form would be [97,776] (same accent but an 'a' instead).
+%% The datafile is generated from the table on Apple's developer connection
+%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
+%% The generating is done whenever new data is present (i.e. dec.dat has
+%% to be changed) and not for every build. The product (the C header) is copied
+%% to $ERL_TOP/erts/beam after generation and checked in.
+%% The program and the data file is included for reference.
+
+-module(dec).
+
+-compile(export_all).
+
+-define(HASH_SIZE_FACTOR,2).
+-define(BIG_PREFIX_SIZE,392).
+
+-define(INPUT_FILE_NAME,"dec.dat").
+-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
+
+read(FName) ->
+ {ok,L} = file:consult(FName),
+ [{A,B} || {A,B} <- L,
+ length(A) > 1% , hd(A) < 769
+ ].
+
+dec() ->
+ L = read(?INPUT_FILE_NAME),
+ G = group(L),
+ {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
+ io:format
+ (Out,
+ "/*~n"
+ "* %CopyrightBegin%~n"
+ "*~n"
+ "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
+ "*~n"
+ "* The contents of this file are subject to the Erlang Public License,~n"
+ "* Version 1.1, (the \"License\"); you may not use this file except in~n"
+ "* compliance with the License. You should have received a copy of the~n"
+ "* Erlang Public License along with this software. If not, it can be~n"
+ "* retrieved online at http://www.erlang.org/.~n"
+ "*~n"
+ "* Software distributed under the License is distributed on an "
+ "\"AS IS\"~n"
+ "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n"
+ "* the License for the specific language governing rights and "
+ "limitations~n"
+ "* under the License.~n"
+ "*~n"
+ "* %CopyrightEnd%~n"
+ "*/~n"
+ "/*~n"
+ "* This file is automatically generated by ~p.erl, "
+ "do not edit manually~n"
+ "*/~n",
+ [?MODULE]),
+
+ io:format(Out,
+ "#define HASH_SIZE_FACTOR ~w~n"
+ "typedef struct _compose_entry {~n"
+ " Uint16 c;~n"
+ " Uint16 res;~n"
+ " Uint16 num_subs;~n"
+ " struct _compose_entry *subs;~n"
+ " int *hash;~n"
+ "} CompEntry;~n~n"
+ "static int compose_tab_size = ~p;~n",
+ [?HASH_SIZE_FACTOR,length(G)]),
+ d(Out,G,[],0),
+ PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
+ dump_prefixes(Out,PreTab),
+%% Using this cuts down on the searching in the
+%% actual implementation, but wastes memory with little real gain..
+%% LL = lists:flatten([PartList || {PartList,_} <- L]),
+%% BigPreTab = tuple_to_list(
+%% make_big_prefixes(LL,
+%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
+%% dump_big_prefixes(Out,BigPreTab),
+ file:close(Out),
+ ok.
+
+
+
+d(Out,List,D,C) ->
+ d_sub(Out,List,D,C),
+ d_top_hash(Out,List,D,C),
+ d_top(Out,List,D,C).
+d_sub(_Out,[],_D,_C) ->
+ ok;
+d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
+ d_sub(Out,T,D,C+1);
+d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
+ d(Out,Subs,[C|D],0),
+ d_sub(Out,T,D,C+1).
+d_top(Out,L,D,C) ->
+ io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
+ d_top_1(Out,L,D,C),
+ io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
+
+d_top_1(_Out,[],_D,_C) ->
+ ok;
+d_top_1(Out,[{CP,[],Res}|T],D,C) ->
+ io:format(Out,
+ "{~w, ~w, 0, NULL, NULL}",[CP,Res]),
+ if
+ T =:= [] ->
+ io:format(Out,"~n",[]);
+ true ->
+ io:format(Out,",~n",[])
+ end,
+ d_top_1(Out,T,D,C+1);
+d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
+ io:format(Out,
+ "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
+ format_depth([C|D]),
+ "hash_"++format_depth([C|D])]),
+ if
+ T =:= [] ->
+ io:format(Out,"~n",[]);
+ true ->
+ io:format(Out,",~n",[])
+ end,
+ d_top_1(Out,T,D,C+1).
+
+
+d_top_hash(Out,List,D,_C) ->
+ HSize = length(List)*?HASH_SIZE_FACTOR,
+ io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
+ Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
+ io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
+
+d_top_hash_1([],_,Hash,_HSize) ->
+ Hash;
+d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
+ Bucket = hash_search(Hash,HSize,CP rem HSize),
+ d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
+
+hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
+ Bucket;
+hash_search(Hash,HSize,Bucket) ->
+ hash_search(Hash,HSize,(Bucket + 1) rem HSize).
+
+format_depth(D) ->
+ lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
+
+
+
+
+make_prefix_table([],Table) ->
+ Table;
+make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
+ Index = (C div 32) + 1 - 24,
+ Pos = C rem 32,
+ X = element(Index,Table),
+ Y = X bor (1 bsl Pos),
+ NewTab = setelement(Index,Table,Y),
+ make_prefix_table(T,NewTab);
+make_prefix_table([_|T],Tab) ->
+ make_prefix_table(T,Tab).
+
+dump_prefixes(Out,L) ->
+ io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
+ io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
+ dump_prefixes_1(Out,L).
+dump_prefixes_1(Out,[H]) ->
+ io:format(Out," 0x~8.16.0BU~n",[H]),
+ io:format(Out,"};~n",[]);
+dump_prefixes_1(Out,[H|T]) ->
+ io:format(Out," 0x~8.16.0BU,~n",[H]),
+ dump_prefixes_1(Out,T).
+
+%% make_big_prefixes([],Table) ->
+%% Table;
+%% make_big_prefixes([C|T],Table) ->
+%% Index = (C div 32) + 1,
+%% Pos = C rem 32,
+%% X = element(Index,Table),
+%% Y = X bor (1 bsl Pos),
+%% NewTab = setelement(Index,Table,Y),
+%% make_big_prefixes(T,NewTab).
+
+%% dump_big_prefixes(Out,L) ->
+%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
+%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
+%% dump_prefixes_1(Out,L).
+
+pick([],_,Acc) ->
+ {lists:reverse(Acc),[]};
+pick([{[H|TT],N}|T],H,Acc) ->
+ pick(T,H,[{TT,N}|Acc]);
+pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
+ {lists:reverse(Acc),L}.
+
+
+group([]) ->
+ [];
+group([{[H],N}|T]) ->
+ {Part,Rest} = pick(T,H,[]),
+ [{H,group(Part),N}| group(Rest)];
+group([{[H|_],_}|_]=L) ->
+ {Part,Rest} = pick(L,H,[]),
+ [{H,group(Part),0}| group(Rest)].
+
+
+
+
+