diff options
author | Patrik Nyblom <[email protected]> | 2010-12-03 15:38:14 +0100 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2010-12-03 15:38:14 +0100 |
commit | 73b53fe24ebc595a81c4ee023bcfef5a71e59db4 (patch) | |
tree | ed75b935318dbef275d62ab141a5f59b5b36bce0 /erts/emulator/internal_doc/dec.erl | |
parent | 52b0fbbc4daa4e22fcad6fe39dd8f8cec8bb8d1b (diff) | |
parent | 159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 (diff) | |
download | otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.gz otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.bz2 otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.zip |
Merge branch 'pan/unicode-filenames/OTP-8887' into dev
* pan/unicode-filenames/OTP-8887: (27 commits)
Test and correct filelib and filename
Add documentation to erlang.xml and slight correction to unicode_usage.xml
Add section about Unicode file names to stdlib users guide
Correct bug in file_name_SUITE making it fail on Unix instead of Windows7
Add documentation about raw filenames and Unicode file name translation mode
Make filelib not crash on re codepoints beyond 255 in re when filename is raw
Mend on_load_embedded testcase which did not handle windows links
Correct testcase regarding windows versions supporting soft links.
Teach filelib to use re in unicode mode when filenames are not raw
Treat soft links on Windows correctly in file_name_SUITE
Adapt new soft and hard link routines on Windos to Unicode
Corrected testcases broken by unicode filenames
Update preloaded prim_file
Teach prim_file not to accept atoms and not to throw exceptions
Adapt inet_drv to Visual Studio 2008
Teach spawn_executable about Unicode
Convert filenames read on MacOSX to canonical form
Teach file to accept codepoints beyond 255.
Add testcases
Correct shell utilities to handle unicode and possibly binaries
...
Diffstat (limited to 'erts/emulator/internal_doc/dec.erl')
-rw-r--r-- | erts/emulator/internal_doc/dec.erl | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/erts/emulator/internal_doc/dec.erl b/erts/emulator/internal_doc/dec.erl new file mode 100644 index 0000000000..0315f2a52d --- /dev/null +++ b/erts/emulator/internal_doc/dec.erl @@ -0,0 +1,237 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% This program is used to generate a header file with data for +%% normalizing denormalized unicode. + +%% The C header is generated from a text file containing tuples in the +%% following format: +%% {RevList,Translation} +%% Where 'RevList' is a reversed list of the denormalized repressentation of +%% the character 'Translation'. An example would be the swedish character +%% '�', which would be represented in the file as: +%% {[776,111],246}, as the denormalized representation of codepoint 246 +%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776), +%% while '�' instead is represented as {[776,97],228}, as the denormalized +%% form would be [97,776] (same accent but an 'a' instead). +%% The datafile is generated from the table on Apple's developer connection +%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html +%% The generating is done whenever new data is present (i.e. dec.dat has +%% to be changed) and not for every build. The product (the C header) is copied +%% to $ERL_TOP/erts/beam after generation and checked in. +%% The program and the data file is included for reference. + +-module(dec). + +-compile(export_all). + +-define(HASH_SIZE_FACTOR,2). +-define(BIG_PREFIX_SIZE,392). + +-define(INPUT_FILE_NAME,"dec.dat"). +-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h"). + +read(FName) -> + {ok,L} = file:consult(FName), + [{A,B} || {A,B} <- L, + length(A) > 1% , hd(A) < 769 + ]. + +dec() -> + L = read(?INPUT_FILE_NAME), + G = group(L), + {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]), + io:format + (Out, + "/*~n" + "* %CopyrightBegin%~n" + "*~n" + "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n" + "*~n" + "* The contents of this file are subject to the Erlang Public License,~n" + "* Version 1.1, (the \"License\"); you may not use this file except in~n" + "* compliance with the License. You should have received a copy of the~n" + "* Erlang Public License along with this software. If not, it can be~n" + "* retrieved online at http://www.erlang.org/.~n" + "*~n" + "* Software distributed under the License is distributed on an " + "\"AS IS\"~n" + "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n" + "* the License for the specific language governing rights and " + "limitations~n" + "* under the License.~n" + "*~n" + "* %CopyrightEnd%~n" + "*/~n" + "/*~n" + "* This file is automatically generated by ~p.erl, " + "do not edit manually~n" + "*/~n", + [?MODULE]), + + io:format(Out, + "#define HASH_SIZE_FACTOR ~w~n" + "typedef struct _compose_entry {~n" + " Uint16 c;~n" + " Uint16 res;~n" + " Uint16 num_subs;~n" + " struct _compose_entry *subs;~n" + " int *hash;~n" + "} CompEntry;~n~n" + "static int compose_tab_size = ~p;~n", + [?HASH_SIZE_FACTOR,length(G)]), + d(Out,G,[],0), + PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))), + dump_prefixes(Out,PreTab), +%% Using this cuts down on the searching in the +%% actual implementation, but wastes memory with little real gain.. +%% LL = lists:flatten([PartList || {PartList,_} <- L]), +%% BigPreTab = tuple_to_list( +%% make_big_prefixes(LL, +%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))), +%% dump_big_prefixes(Out,BigPreTab), + file:close(Out), + ok. + + + +d(Out,List,D,C) -> + d_sub(Out,List,D,C), + d_top_hash(Out,List,D,C), + d_top(Out,List,D,C). +d_sub(_Out,[],_D,_C) -> + ok; +d_sub(Out,[{_CP,[],_Res}|T],D,C) -> + d_sub(Out,T,D,C+1); +d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) -> + d(Out,Subs,[C|D],0), + d_sub(Out,T,D,C+1). +d_top(Out,L,D,C) -> + io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]), + d_top_1(Out,L,D,C), + io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]). + +d_top_1(_Out,[],_D,_C) -> + ok; +d_top_1(Out,[{CP,[],Res}|T],D,C) -> + io:format(Out, + "{~w, ~w, 0, NULL, NULL}",[CP,Res]), + if + T =:= [] -> + io:format(Out,"~n",[]); + true -> + io:format(Out,",~n",[]) + end, + d_top_1(Out,T,D,C+1); +d_top_1(Out,[{CP,Subs,_Res}|T],D,C) -> + io:format(Out, + "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs), + format_depth([C|D]), + "hash_"++format_depth([C|D])]), + if + T =:= [] -> + io:format(Out,"~n",[]); + true -> + io:format(Out,",~n",[]) + end, + d_top_1(Out,T,D,C+1). + + +d_top_hash(Out,List,D,_C) -> + HSize = length(List)*?HASH_SIZE_FACTOR, + io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]), + Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize), + io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]). + +d_top_hash_1([],_,Hash,_HSize) -> + Hash; +d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) -> + Bucket = hash_search(Hash,HSize,CP rem HSize), + d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize). + +hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 -> + Bucket; +hash_search(Hash,HSize,Bucket) -> + hash_search(Hash,HSize,(Bucket + 1) rem HSize). + +format_depth(D) -> + lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))). + + + + +make_prefix_table([],Table) -> + Table; +make_prefix_table([{C,_,_}|T],Table) when C =< 4023 -> + Index = (C div 32) + 1 - 24, + Pos = C rem 32, + X = element(Index,Table), + Y = X bor (1 bsl Pos), + NewTab = setelement(Index,Table,Y), + make_prefix_table(T,NewTab); +make_prefix_table([_|T],Tab) -> + make_prefix_table(T,Tab). + +dump_prefixes(Out,L) -> + io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]), + io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]), + dump_prefixes_1(Out,L). +dump_prefixes_1(Out,[H]) -> + io:format(Out," 0x~8.16.0BU~n",[H]), + io:format(Out,"};~n",[]); +dump_prefixes_1(Out,[H|T]) -> + io:format(Out," 0x~8.16.0BU,~n",[H]), + dump_prefixes_1(Out,T). + +%% make_big_prefixes([],Table) -> +%% Table; +%% make_big_prefixes([C|T],Table) -> +%% Index = (C div 32) + 1, +%% Pos = C rem 32, +%% X = element(Index,Table), +%% Y = X bor (1 bsl Pos), +%% NewTab = setelement(Index,Table,Y), +%% make_big_prefixes(T,NewTab). + +%% dump_big_prefixes(Out,L) -> +%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]), +%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]), +%% dump_prefixes_1(Out,L). + +pick([],_,Acc) -> + {lists:reverse(Acc),[]}; +pick([{[H|TT],N}|T],H,Acc) -> + pick(T,H,[{TT,N}|Acc]); +pick([{[H|_],_}|_]=L,M,Acc) when H =/= M -> + {lists:reverse(Acc),L}. + + +group([]) -> + []; +group([{[H],N}|T]) -> + {Part,Rest} = pick(T,H,[]), + [{H,group(Part),N}| group(Rest)]; +group([{[H|_],_}|_]=L) -> + {Part,Rest} = pick(L,H,[]), + [{H,group(Part),0}| group(Rest)]. + + + + + |