aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/internal_doc/dec.erl
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2010-12-03 15:38:14 +0100
committerPatrik Nyblom <[email protected]>2010-12-03 15:38:14 +0100
commit73b53fe24ebc595a81c4ee023bcfef5a71e59db4 (patch)
treeed75b935318dbef275d62ab141a5f59b5b36bce0 /erts/emulator/internal_doc/dec.erl
parent52b0fbbc4daa4e22fcad6fe39dd8f8cec8bb8d1b (diff)
parent159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 (diff)
downloadotp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.gz
otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.bz2
otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.zip
Merge branch 'pan/unicode-filenames/OTP-8887' into dev
* pan/unicode-filenames/OTP-8887: (27 commits) Test and correct filelib and filename Add documentation to erlang.xml and slight correction to unicode_usage.xml Add section about Unicode file names to stdlib users guide Correct bug in file_name_SUITE making it fail on Unix instead of Windows7 Add documentation about raw filenames and Unicode file name translation mode Make filelib not crash on re codepoints beyond 255 in re when filename is raw Mend on_load_embedded testcase which did not handle windows links Correct testcase regarding windows versions supporting soft links. Teach filelib to use re in unicode mode when filenames are not raw Treat soft links on Windows correctly in file_name_SUITE Adapt new soft and hard link routines on Windos to Unicode Corrected testcases broken by unicode filenames Update preloaded prim_file Teach prim_file not to accept atoms and not to throw exceptions Adapt inet_drv to Visual Studio 2008 Teach spawn_executable about Unicode Convert filenames read on MacOSX to canonical form Teach file to accept codepoints beyond 255. Add testcases Correct shell utilities to handle unicode and possibly binaries ...
Diffstat (limited to 'erts/emulator/internal_doc/dec.erl')
-rw-r--r--erts/emulator/internal_doc/dec.erl237
1 files changed, 237 insertions, 0 deletions
diff --git a/erts/emulator/internal_doc/dec.erl b/erts/emulator/internal_doc/dec.erl
new file mode 100644
index 0000000000..0315f2a52d
--- /dev/null
+++ b/erts/emulator/internal_doc/dec.erl
@@ -0,0 +1,237 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2000-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% This program is used to generate a header file with data for
+%% normalizing denormalized unicode.
+
+%% The C header is generated from a text file containing tuples in the
+%% following format:
+%% {RevList,Translation}
+%% Where 'RevList' is a reversed list of the denormalized repressentation of
+%% the character 'Translation'. An example would be the swedish character
+%% '�', which would be represented in the file as:
+%% {[776,111],246}, as the denormalized representation of codepoint 246
+%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
+%% while '�' instead is represented as {[776,97],228}, as the denormalized
+%% form would be [97,776] (same accent but an 'a' instead).
+%% The datafile is generated from the table on Apple's developer connection
+%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
+%% The generating is done whenever new data is present (i.e. dec.dat has
+%% to be changed) and not for every build. The product (the C header) is copied
+%% to $ERL_TOP/erts/beam after generation and checked in.
+%% The program and the data file is included for reference.
+
+-module(dec).
+
+-compile(export_all).
+
+-define(HASH_SIZE_FACTOR,2).
+-define(BIG_PREFIX_SIZE,392).
+
+-define(INPUT_FILE_NAME,"dec.dat").
+-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
+
+read(FName) ->
+ {ok,L} = file:consult(FName),
+ [{A,B} || {A,B} <- L,
+ length(A) > 1% , hd(A) < 769
+ ].
+
+dec() ->
+ L = read(?INPUT_FILE_NAME),
+ G = group(L),
+ {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
+ io:format
+ (Out,
+ "/*~n"
+ "* %CopyrightBegin%~n"
+ "*~n"
+ "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
+ "*~n"
+ "* The contents of this file are subject to the Erlang Public License,~n"
+ "* Version 1.1, (the \"License\"); you may not use this file except in~n"
+ "* compliance with the License. You should have received a copy of the~n"
+ "* Erlang Public License along with this software. If not, it can be~n"
+ "* retrieved online at http://www.erlang.org/.~n"
+ "*~n"
+ "* Software distributed under the License is distributed on an "
+ "\"AS IS\"~n"
+ "* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n"
+ "* the License for the specific language governing rights and "
+ "limitations~n"
+ "* under the License.~n"
+ "*~n"
+ "* %CopyrightEnd%~n"
+ "*/~n"
+ "/*~n"
+ "* This file is automatically generated by ~p.erl, "
+ "do not edit manually~n"
+ "*/~n",
+ [?MODULE]),
+
+ io:format(Out,
+ "#define HASH_SIZE_FACTOR ~w~n"
+ "typedef struct _compose_entry {~n"
+ " Uint16 c;~n"
+ " Uint16 res;~n"
+ " Uint16 num_subs;~n"
+ " struct _compose_entry *subs;~n"
+ " int *hash;~n"
+ "} CompEntry;~n~n"
+ "static int compose_tab_size = ~p;~n",
+ [?HASH_SIZE_FACTOR,length(G)]),
+ d(Out,G,[],0),
+ PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
+ dump_prefixes(Out,PreTab),
+%% Using this cuts down on the searching in the
+%% actual implementation, but wastes memory with little real gain..
+%% LL = lists:flatten([PartList || {PartList,_} <- L]),
+%% BigPreTab = tuple_to_list(
+%% make_big_prefixes(LL,
+%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
+%% dump_big_prefixes(Out,BigPreTab),
+ file:close(Out),
+ ok.
+
+
+
+d(Out,List,D,C) ->
+ d_sub(Out,List,D,C),
+ d_top_hash(Out,List,D,C),
+ d_top(Out,List,D,C).
+d_sub(_Out,[],_D,_C) ->
+ ok;
+d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
+ d_sub(Out,T,D,C+1);
+d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
+ d(Out,Subs,[C|D],0),
+ d_sub(Out,T,D,C+1).
+d_top(Out,L,D,C) ->
+ io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
+ d_top_1(Out,L,D,C),
+ io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
+
+d_top_1(_Out,[],_D,_C) ->
+ ok;
+d_top_1(Out,[{CP,[],Res}|T],D,C) ->
+ io:format(Out,
+ "{~w, ~w, 0, NULL, NULL}",[CP,Res]),
+ if
+ T =:= [] ->
+ io:format(Out,"~n",[]);
+ true ->
+ io:format(Out,",~n",[])
+ end,
+ d_top_1(Out,T,D,C+1);
+d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
+ io:format(Out,
+ "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
+ format_depth([C|D]),
+ "hash_"++format_depth([C|D])]),
+ if
+ T =:= [] ->
+ io:format(Out,"~n",[]);
+ true ->
+ io:format(Out,",~n",[])
+ end,
+ d_top_1(Out,T,D,C+1).
+
+
+d_top_hash(Out,List,D,_C) ->
+ HSize = length(List)*?HASH_SIZE_FACTOR,
+ io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
+ Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
+ io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
+
+d_top_hash_1([],_,Hash,_HSize) ->
+ Hash;
+d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
+ Bucket = hash_search(Hash,HSize,CP rem HSize),
+ d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
+
+hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
+ Bucket;
+hash_search(Hash,HSize,Bucket) ->
+ hash_search(Hash,HSize,(Bucket + 1) rem HSize).
+
+format_depth(D) ->
+ lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
+
+
+
+
+make_prefix_table([],Table) ->
+ Table;
+make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
+ Index = (C div 32) + 1 - 24,
+ Pos = C rem 32,
+ X = element(Index,Table),
+ Y = X bor (1 bsl Pos),
+ NewTab = setelement(Index,Table,Y),
+ make_prefix_table(T,NewTab);
+make_prefix_table([_|T],Tab) ->
+ make_prefix_table(T,Tab).
+
+dump_prefixes(Out,L) ->
+ io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
+ io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
+ dump_prefixes_1(Out,L).
+dump_prefixes_1(Out,[H]) ->
+ io:format(Out," 0x~8.16.0BU~n",[H]),
+ io:format(Out,"};~n",[]);
+dump_prefixes_1(Out,[H|T]) ->
+ io:format(Out," 0x~8.16.0BU,~n",[H]),
+ dump_prefixes_1(Out,T).
+
+%% make_big_prefixes([],Table) ->
+%% Table;
+%% make_big_prefixes([C|T],Table) ->
+%% Index = (C div 32) + 1,
+%% Pos = C rem 32,
+%% X = element(Index,Table),
+%% Y = X bor (1 bsl Pos),
+%% NewTab = setelement(Index,Table,Y),
+%% make_big_prefixes(T,NewTab).
+
+%% dump_big_prefixes(Out,L) ->
+%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
+%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
+%% dump_prefixes_1(Out,L).
+
+pick([],_,Acc) ->
+ {lists:reverse(Acc),[]};
+pick([{[H|TT],N}|T],H,Acc) ->
+ pick(T,H,[{TT,N}|Acc]);
+pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
+ {lists:reverse(Acc),L}.
+
+
+group([]) ->
+ [];
+group([{[H],N}|T]) ->
+ {Part,Rest} = pick(T,H,[]),
+ [{H,group(Part),N}| group(Rest)];
+group([{[H|_],_}|_]=L) ->
+ {Part,Rest} = pick(L,H,[]),
+ [{H,group(Part),0}| group(Rest)].
+
+
+
+
+