%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2000-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% %% This program is used to generate a header file with data for %% normalizing denormalized unicode. %% The C header is generated from a text file containing tuples in the %% following format: %% {RevList,Translation} %% Where 'RevList' is a reversed list of the denormalized repressentation of %% the character 'Translation'. An example would be the swedish character %% '�', which would be represented in the file as: %% {[776,111],246}, as the denormalized representation of codepoint 246 %% is [111,776] (i.e an 'o' followed by the "double dot accent character 776), %% while '�' instead is represented as {[776,97],228}, as the denormalized %% form would be [97,776] (same accent but an 'a' instead). %% The datafile is generated from the table on Apple's developer connection %% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html %% The generating is done whenever new data is present (i.e. dec.dat has %% to be changed) and not for every build. The product (the C header) is copied %% to $ERL_TOP/erts/beam after generation and checked in. %% The program and the data file is included for reference. -module(dec). -compile(export_all). -define(HASH_SIZE_FACTOR,2). -define(BIG_PREFIX_SIZE,392). -define(INPUT_FILE_NAME,"dec.dat"). -define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h"). read(FName) -> {ok,L} = file:consult(FName), [{A,B} || {A,B} <- L, length(A) > 1% , hd(A) < 769 ]. dec() -> L = read(?INPUT_FILE_NAME), G = group(L), {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]), io:format (Out, "/*~n" "* %CopyrightBegin%~n" "*~n" "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n" "*~n" "* The contents of this file are subject to the Erlang Public License,~n" "* Version 1.1, (the \"License\"); you may not use this file except in~n" "* compliance with the License. You should have received a copy of the~n" "* Erlang Public License along with this software. If not, it can be~n" "* retrieved online at http://www.erlang.org/.~n" "*~n" "* Software distributed under the License is distributed on an " "\"AS IS\"~n" "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n" "* the License for the specific language governing rights and " "limitations~n" "* under the License.~n" "*~n" "* %CopyrightEnd%~n" "*/~n" "/*~n" "* This file is automatically generated by ~p.erl, " "do not edit manually~n" "*/~n", [?MODULE]), io:format(Out, "#define HASH_SIZE_FACTOR ~w~n" "typedef struct _compose_entry {~n" " Uint16 c;~n" " Uint16 res;~n" " Uint16 num_subs;~n" " struct _compose_entry *subs;~n" " int *hash;~n" "} CompEntry;~n~n" "static int compose_tab_size = ~p;~n", [?HASH_SIZE_FACTOR,length(G)]), d(Out,G,[],0), PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))), dump_prefixes(Out,PreTab), %% Using this cuts down on the searching in the %% actual implementation, but wastes memory with little real gain.. %% LL = lists:flatten([PartList || {PartList,_} <- L]), %% BigPreTab = tuple_to_list( %% make_big_prefixes(LL, %% erlang:make_tuple(?BIG_PREFIX_SIZE,0))), %% dump_big_prefixes(Out,BigPreTab), file:close(Out), ok. d(Out,List,D,C) -> d_sub(Out,List,D,C), d_top_hash(Out,List,D,C), d_top(Out,List,D,C). d_sub(_Out,[],_D,_C) -> ok; d_sub(Out,[{_CP,[],_Res}|T],D,C) -> d_sub(Out,T,D,C+1); d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) -> d(Out,Subs,[C|D],0), d_sub(Out,T,D,C+1). d_top(Out,L,D,C) -> io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]), d_top_1(Out,L,D,C), io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]). d_top_1(_Out,[],_D,_C) -> ok; d_top_1(Out,[{CP,[],Res}|T],D,C) -> io:format(Out, "{~w, ~w, 0, NULL, NULL}",[CP,Res]), if T =:= [] -> io:format(Out,"~n",[]); true -> io:format(Out,",~n",[]) end, d_top_1(Out,T,D,C+1); d_top_1(Out,[{CP,Subs,_Res}|T],D,C) -> io:format(Out, "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs), format_depth([C|D]), "hash_"++format_depth([C|D])]), if T =:= [] -> io:format(Out,"~n",[]); true -> io:format(Out,",~n",[]) end, d_top_1(Out,T,D,C+1). d_top_hash(Out,List,D,_C) -> HSize = length(List)*?HASH_SIZE_FACTOR, io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]), Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize), io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]). d_top_hash_1([],_,Hash,_HSize) -> Hash; d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) -> Bucket = hash_search(Hash,HSize,CP rem HSize), d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize). hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 -> Bucket; hash_search(Hash,HSize,Bucket) -> hash_search(Hash,HSize,(Bucket + 1) rem HSize). format_depth(D) -> lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))). make_prefix_table([],Table) -> Table; make_prefix_table([{C,_,_}|T],Table) when C =< 4023 -> Index = (C div 32) + 1 - 24, Pos = C rem 32, X = element(Index,Table), Y = X bor (1 bsl Pos), NewTab = setelement(Index,Table,Y), make_prefix_table(T,NewTab); make_prefix_table([_|T],Tab) -> make_prefix_table(T,Tab). dump_prefixes(Out,L) -> io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]), io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]), dump_prefixes_1(Out,L). dump_prefixes_1(Out,[H]) -> io:format(Out," 0x~8.16.0BU~n",[H]), io:format(Out,"};~n",[]); dump_prefixes_1(Out,[H|T]) -> io:format(Out," 0x~8.16.0BU,~n",[H]), dump_prefixes_1(Out,T). %% make_big_prefixes([],Table) -> %% Table; %% make_big_prefixes([C|T],Table) -> %% Index = (C div 32) + 1, %% Pos = C rem 32, %% X = element(Index,Table), %% Y = X bor (1 bsl Pos), %% NewTab = setelement(Index,Table,Y), %% make_big_prefixes(T,NewTab). %% dump_big_prefixes(Out,L) -> %% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]), %% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]), %% dump_prefixes_1(Out,L). pick([],_,Acc) -> {lists:reverse(Acc),[]}; pick([{[H|TT],N}|T],H,Acc) -> pick(T,H,[{TT,N}|Acc]); pick([{[H|_],_}|_]=L,M,Acc) when H =/= M -> {lists:reverse(Acc),L}. group([]) -> []; group([{[H],N}|T]) -> {Part,Rest} = pick(T,H,[]), [{H,group(Part),N}| group(Rest)]; group([{[H|_],_}|_]=L) -> {Part,Rest} = pick(L,H,[]), [{H,group(Part),0}| group(Rest)].