%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2000-2010. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% %CopyrightEnd%
%%
%% This program is used to generate a header file with data for
%% normalizing denormalized unicode.
%% The C header is generated from a text file containing tuples in the
%% following format:
%% {RevList,Translation}
%% Where 'RevList' is a reversed list of the denormalized repressentation of
%% the character 'Translation'. An example would be the swedish character
%% 'ö', which would be represented in the file as:
%% {[776,111],246}, as the denormalized representation of codepoint 246
%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
%% while 'ä' instead is represented as {[776,97],228}, as the denormalized
%% form would be [97,776] (same accent but an 'a' instead).
%% The datafile is generated from the table on Apple's developer connection
%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
%% The generating is done whenever new data is present (i.e. dec.dat has
%% to be changed) and not for every build. The product (the C header) is copied
%% to $ERL_TOP/erts/beam after generation and checked in.
%% The program and the data file is included for reference.
-module(dec).
-compile(export_all).
-define(HASH_SIZE_FACTOR,2).
-define(BIG_PREFIX_SIZE,392).
-define(INPUT_FILE_NAME,"dec.dat").
-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
read(FName) ->
{ok,L} = file:consult(FName),
[{A,B} || {A,B} <- L,
length(A) > 1% , hd(A) < 769
].
dec() ->
L = read(?INPUT_FILE_NAME),
G = group(L),
{ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
io:format
(Out,
"/*~n"
"* %CopyrightBegin%~n"
"*~n"
"* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
"*~n"
"* The contents of this file are subject to the Erlang Public License,~n"
"* Version 1.1, (the \"License\"); you may not use this file except in~n"
"* compliance with the License. You should have received a copy of the~n"
"* Erlang Public License along with this software. If not, it can be~n"
"* retrieved online at http://www.erlang.org/.~n"
"*~n"
"* Software distributed under the License is distributed on an "
"\"AS IS\"~n"
"* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n"
"* the License for the specific language governing rights and "
"limitations~n"
"* under the License.~n"
"*~n"
"* %CopyrightEnd%~n"
"*/~n"
"/*~n"
"* This file is automatically generated by ~p.erl, "
"do not edit manually~n"
"*/~n",
[?MODULE]),
io:format(Out,
"#define HASH_SIZE_FACTOR ~w~n"
"typedef struct _compose_entry {~n"
" Uint16 c;~n"
" Uint16 res;~n"
" Uint16 num_subs;~n"
" struct _compose_entry *subs;~n"
" int *hash;~n"
"} CompEntry;~n~n"
"static int compose_tab_size = ~p;~n",
[?HASH_SIZE_FACTOR,length(G)]),
d(Out,G,[],0),
PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
dump_prefixes(Out,PreTab),
%% Using this cuts down on the searching in the
%% actual implementation, but wastes memory with little real gain..
%% LL = lists:flatten([PartList || {PartList,_} <- L]),
%% BigPreTab = tuple_to_list(
%% make_big_prefixes(LL,
%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
%% dump_big_prefixes(Out,BigPreTab),
file:close(Out),
ok.
d(Out,List,D,C) ->
d_sub(Out,List,D,C),
d_top_hash(Out,List,D,C),
d_top(Out,List,D,C).
d_sub(_Out,[],_D,_C) ->
ok;
d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
d_sub(Out,T,D,C+1);
d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
d(Out,Subs,[C|D],0),
d_sub(Out,T,D,C+1).
d_top(Out,L,D,C) ->
io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
d_top_1(Out,L,D,C),
io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
d_top_1(_Out,[],_D,_C) ->
ok;
d_top_1(Out,[{CP,[],Res}|T],D,C) ->
io:format(Out,
"{~w, ~w, 0, NULL, NULL}",[CP,Res]),
if
T =:= [] ->
io:format(Out,"~n",[]);
true ->
io:format(Out,",~n",[])
end,
d_top_1(Out,T,D,C+1);
d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
io:format(Out,
"{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
format_depth([C|D]),
"hash_"++format_depth([C|D])]),
if
T =:= [] ->
io:format(Out,"~n",[]);
true ->
io:format(Out,",~n",[])
end,
d_top_1(Out,T,D,C+1).
d_top_hash(Out,List,D,_C) ->
HSize = length(List)*?HASH_SIZE_FACTOR,
io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
d_top_hash_1([],_,Hash,_HSize) ->
Hash;
d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
Bucket = hash_search(Hash,HSize,CP rem HSize),
d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
Bucket;
hash_search(Hash,HSize,Bucket) ->
hash_search(Hash,HSize,(Bucket + 1) rem HSize).
format_depth(D) ->
lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
make_prefix_table([],Table) ->
Table;
make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
Index = (C div 32) + 1 - 24,
Pos = C rem 32,
X = element(Index,Table),
Y = X bor (1 bsl Pos),
NewTab = setelement(Index,Table,Y),
make_prefix_table(T,NewTab);
make_prefix_table([_|T],Tab) ->
make_prefix_table(T,Tab).
dump_prefixes(Out,L) ->
io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
dump_prefixes_1(Out,L).
dump_prefixes_1(Out,[H]) ->
io:format(Out," 0x~8.16.0BU~n",[H]),
io:format(Out,"};~n",[]);
dump_prefixes_1(Out,[H|T]) ->
io:format(Out," 0x~8.16.0BU,~n",[H]),
dump_prefixes_1(Out,T).
%% make_big_prefixes([],Table) ->
%% Table;
%% make_big_prefixes([C|T],Table) ->
%% Index = (C div 32) + 1,
%% Pos = C rem 32,
%% X = element(Index,Table),
%% Y = X bor (1 bsl Pos),
%% NewTab = setelement(Index,Table,Y),
%% make_big_prefixes(T,NewTab).
%% dump_big_prefixes(Out,L) ->
%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
%% dump_prefixes_1(Out,L).
pick([],_,Acc) ->
{lists:reverse(Acc),[]};
pick([{[H|TT],N}|T],H,Acc) ->
pick(T,H,[{TT,N}|Acc]);
pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
{lists:reverse(Acc),L}.
group([]) ->
[];
group([{[H],N}|T]) ->
{Part,Rest} = pick(T,H,[]),
[{H,group(Part),N}| group(Rest)];
group([{[H|_],_}|_]=L) ->
{Part,Rest} = pick(L,H,[]),
[{H,group(Part),0}| group(Rest)].