From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/wx/examples/demo/demo_html_tagger.erl | 534 ++++++++++++++++++++++++++++++ 1 file changed, 534 insertions(+) create mode 100644 lib/wx/examples/demo/demo_html_tagger.erl (limited to 'lib/wx/examples/demo/demo_html_tagger.erl') diff --git a/lib/wx/examples/demo/demo_html_tagger.erl b/lib/wx/examples/demo/demo_html_tagger.erl new file mode 100644 index 0000000000..9b6d1fd950 --- /dev/null +++ b/lib/wx/examples/demo/demo_html_tagger.erl @@ -0,0 +1,534 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% + +-module(demo_html_tagger). + +%% You will notice that this program has very few type declarations +%% That's because this program uses some pretty dodgy techniques to +%% get at the data it requires. + +%% I use epp_dodger to parse the file and the new imporved erl_scan +%% find the exact values of the tokens + +%% epp_dodger returns an objects of type erl_syntax which are pretty +%% nasty buggers. We could write the types out but it would hardly +%% help. + +%% to test run + +%%-compile(export_all). + + +-export([erl2htmltext/1, erl2htmlfile/1]). + +erl2htmltext(File) -> + try + erl2html0(File) + catch + What:Why -> + io:format("error in:~s ~p ~p~n",[File,What,Why]) + end. + +erl2htmlfile(File) -> + try + Text = erl2html0(File), + Root = filename:basename(filename:rootname(File)), + Out = "./html/" ++ Root ++ ".html", + file:write_file(Out, [Text]) + catch + What:Why -> + io:format("error in:~s ~p ~p~n",[File,What,Why]) + end. + + +splitErlang(File) -> + {ok, Forms} = dodge_file(File), + {Anchors, Patches} = analyse(Forms), + Raw = read_raw_forms(File), + Raw1 = merge_anchors(Anchors, Raw), + Raw2 = merge_forms(Raw1, Patches, []), + Rtf = [final(I) || I <- Raw2], + {taggedBlocks, Rtf}. + +erl2html0(File) -> + Tb = splitErlang(File), + Html = to_html(Tb), + prelude(Html). + +merge_forms([{Tag,L1}|T], Patches, L) -> + {L2, Patches1} = apply_patches(L1, Patches), + merge_forms(T, Patches1, [{Tag,L2}|L]); +merge_forms([], _, L) -> + lists:reverse(L). + +apply_patches(Toks, []) -> + %% we've run out of patches but we must still simplify + %% every term + {[simplify(I) || I <- Toks], []}; +apply_patches(Toks, Patches) -> + apply_patches(Toks, Patches, []). + +apply_patches([{atom,Ln,Val}=A|T], Patches, L) -> + case do_patch(Ln, Patches) of + {yes, New, Patches1} -> + New1 = reformat(New, Val), + apply_patches(T, Patches1, [New1|L]); + {no, Patches1} -> + apply_patches(T, Patches1, [simplify(A)|L]) + end; +apply_patches([H|T], Patches, L) -> + apply_patches(T, Patches, [simplify(H)|L]); +apply_patches([], Patches, L) -> + {lists:reverse(L), Patches}. + + +simplify({atom,_,Str}) -> + case (catch list_to_existing_atom(Str)) of + {'EXIT', _} -> + {atom, Str}; + A -> + case is_keyword(A) of + true -> {keyword, Str}; + false -> + {atom, Str} + end + end; +simplify({dot,_,Str}) -> + {terminal, Str}; +simplify({Tag,_,Str}) -> + case is_keyword(Tag) of + true -> + {keyword, Str}; + false -> + case is_terminal(Tag) of + true -> + {terminal, Str}; + false -> + {Tag, Str} + end + end; +simplify(X) -> + io:format("simplify wtfit:~p~n",[X]), + X. + +do_patch(Ln, [{Ln,Tag}|P]) -> {yes, Tag, P}; +do_patch(Ln, [{Ln1,_}|_] = P) when Ln1 > Ln -> {no, P}; +do_patch(Ln, [_|T]) -> do_patch(Ln, T); +do_patch(_, []) -> {no, []}. + +reformat({local,{F,A}}, Str) -> {local,F,A,Str}; +reformat({remote,M,F,A}, Str) -> {remote,M,F,A,Str}; +reformat({remote,{M,F,A}}, Str) -> {remote,M,F,A,Str}; +reformat({bif,{F,A}}, Str) -> {bif,F,A,Str}; +reformat(Tag, Str) -> + io:format("reformat*:~p ~p~n",[Tag,Str]), + {Tag,Str}. + +to_html({taggedBlocks, L}) -> + [[anchor1(Anchor),to_html(Body)] || {Anchor,Body} <- L]; +to_html({taggedToks, L}) -> + [to_html1(I) || I <- L]. + +anchor1({func, F, A}) -> + [""]; +anchor1({specification, F, A}) -> + [""]; +anchor1(_X) -> + "". + +linkname(F, A) when is_atom(F) -> + a2s(F) ++ "-" ++ integer_to_list(A); +linkname(F, A) when is_list(F) -> + F ++ "-" ++ integer_to_list(A). + +a2s(A) -> + atom_to_list(A). + +font(C, S) -> + ["", htmlquote(S), ""]. + +htmlquote("<" ++ T) -> ["<"|htmlquote(T)]; +htmlquote([H|T]) -> [H|htmlquote(T)]; +htmlquote([]) -> []. + +to_html1({white_space,V}) -> V; +to_html1({comment, V}) -> font("#B22222", V); +to_html1({var,V}) -> font("orange", V); +to_html1({string,V}) -> font("#FA8072", V); +to_html1({integer,V}) -> font("#1111AA", V); +to_html1({bif,_F,_A,Str}) -> font("#FF00FF", Str); +to_html1({keyword, V}) -> font("#FF00FF", V); +to_html1({atom, V}) -> V; +to_html1({terminal,V}) -> V; +to_html1({char,V}) -> V; +to_html1({float,V}) -> V; +to_html1({anchor,F,A}) -> + [""]; +to_html1({local,F,A,Str}) -> + ["", + htmlquote(Str),""]; +to_html1({remote,_M,_F,_A,Str}) -> + %%["",htmlquote(Str),""], + Str. + +%% merge the anchors +%% there should be one block per anchor +%% we check the containing form (for safety) + +%% merge_anchors([{_,{file,_}}|A], B) -> +%% merge_anchors(A, B); +merge_anchors([{Tag,Val}=H|A], [B|T]) -> + case contains(Tag, B) of + true -> + [{Val,B}|merge_anchors(A, T)]; + false -> + io:format("Logic error: H=~p B=~p~n",[H,B]), + exit(1) + end; +merge_anchors([], []) -> []; +merge_anchors([], [X]) -> + %% this is the last block - + %% trailing white space and comments have no tag + %% because eos is not a tag ... + [{eof, X}]; +merge_anchors(X, Y) -> + io:format("ops:~p~n",[{X,Y}]), + []. + +contains(Loc, [{_,Loc,_}|_]) -> true; +contains(Loc, [_|T]) -> contains(Loc, T); +contains(_, []) -> false. + + +dodge_file(File) -> + case file:open(File, [read]) of + {ok, Handle} -> + {ok, F} = epp_dodger:parse(Handle, {1,1}), + file:close(Handle), + L = [revert_forms(I) || I <- F], + {ok, L}; + Error -> + Error + end. + +revert_forms(F) -> + case erl_syntax:is_form(F) of + true -> + %% revert fails on ifdef ... etc + case (catch erl_syntax:revert(F)) of + {'EXIT', _Why} -> + io:format("error reverting:~p=~p~n",[F,_Why]), + F; + Other -> + Other + end; + false -> + io:format("uugh:~p~n",[F]) + end. + +%% read up to dot +%% read_raw_forms(File) -> [form()] +%% form() = [tok()] +%% tok() = {Type,{Line::int,Col::int},string} +%% Type = atom | int | var | string ... + +read_raw_forms(File) -> + {ok, Bin} = file:read_file(File), + Str = binary_to_list(Bin), + loop(erl_scan:tokens([], Str, {1,1}, [return,text]), []). + +loop({done, {eof,_}, eof}, L) -> + lists:reverse(L); +loop({done, {ok, Toks, _}, eof}, L) -> + lists:reverse([normalize_toks(Toks)|L]); +loop({done, {ok, Toks, Ln}, Str1}, L) -> + loop(erl_scan:tokens([], Str1, Ln, [return,text]), + [normalize_toks(Toks)|L]); +loop({more, X}, L) -> + loop(erl_scan:tokens(X, eof, {1,1}, [return,text]), L). + +normalize_toks(Toks) -> + [normalize_tok(I) || I <- Toks]. + +normalize_tok(Tok) -> + %% this is the portable way ... + [{_,Type},{_,Line},{_,Col},{_,Txt}] = + erl_scan:token_info(Tok, [category,line,column,text]), + Val = {Type,{Line,Col},Txt}, + %% io:format("here:X=~p ~p~n",[Tok,Val]), + Val. + + +%% analse the result of dodge_file + +analyse(Forms) -> + Calls = calls(Forms), + Anchors = compute_anchors(Forms), + Imports = [{{F,A},Mod} || + {attribute,_,import,{Mod,L}} <- Forms, {F,A} <- L], + D = dict:from_list(Imports), + Patches = [{Loc, resolve(X, D)} || {Loc, X} <- Calls], + {Anchors, Patches}. + +%% An anchor is something that is placed at the start of every form +%% The anchor is named after the first item in the form +%% compute_anchors(Forms) -> [{{Line,Col}, anchor()}] +%% {Line,Col} is the line and column of where the +%% form starts - this is not the same as the first token in +%% the form since we might have skipped comments and white space +%% at the start of the form. +%% anchor() is a term decscribing the anchor +%% anchor(() = {func,Name,Aritry} (for functions) +%% | +%% | {Type,{Line,Col}} anythis else + +compute_anchors(Forms) -> + A1 = [anchor0(I) || I <- Forms], + merge_specs(A1). + +%% If a specification is immediately followed by +%% a function when we promote the function anchor to point +%% at the specification. +%% We change the second tag to func2 - because we still want a +%% tag for every block + +merge_specs([{_Ln1,{specification,F,A}}=H,{Ln2, {func,F,A}}|T]) -> + [H,{Ln2,{func1,F,A}}|merge_specs(T)]; +merge_specs([H|T]) -> + [H|merge_specs(T)]; +merge_specs([]) -> + []. + +anchor0(I) -> + case anchor(I) of + {{Line,Col,_,_}, Val} -> + {{Line,Col}, Val}; + {{_,_}, _} = X -> + X + end. + +anchor({function, Ln, F, A, _}) -> {Ln, {func, F, A}}; +anchor({attribute,Ln,'spec', {{F,A},_}}) -> + {Ln, {specification,F,A}}; +anchor({attribute,Ln,module, M}) -> + {Ln, {module,M}}; +anchor({attribute,Ln,Type,_}) -> {Ln, {Type, Ln}}; +anchor({eof,Ln}) -> {Ln, eof}; +anchor({error,{Ln,_,_}}) -> + %% Ln is in a different format in errors (sigh) + {Line, Col} = Ln, + Ln1 = {Line,Col,0,""}, + {Ln1, {error, Ln}}; +anchor({tree,attribute,{attr,{_,_,_,Type}=Ln,_,_},_}) -> + {Ln, {attribute,Type,Ln}}; +anchor({tree,attribute,_, + {attribute, {atom,Ln,Type}, _}}) -> + {Ln, {attribute,Type,Ln}}; +anchor({tree,attribute, + {attr,Ln,[],none}, + _}=X) -> + io:format("FIX ME this is a bug????:~p~n",[X]), + {Ln, {other, Ln}}; +anchor(X) -> + %% this is some syntactic form that I don't know + %% about yet ... + io:format("FIX ME this is a bug????:~p~n",[X]), + exit(1). + +resolve({F,A}=Tup, D) -> + case dict:find({F,A}, D) of + {ok, Mod} -> + {remote,Mod,F,A}; + error -> + case erlang:is_builtin(erlang, F, A) of + true -> {bif, {F,A}}; + false -> {local,Tup} + end + end; +resolve({erlang,F,A}, _) -> + {bif,{F,A}}; +resolve({anchor,_,_}=A, _) -> + A; +resolve(X, _D) -> + {remote, X}. + +calls(X) -> lists:reverse(calls(X, [])). + +calls({call,_,{atom,Ln,Func},Args}, L) -> + calls(Args, [{normalise(Ln),{Func,length(Args)}}|L]); +calls({call,_,{remote,_,{atom,Ln1,Mod},{atom,_Ln2,Func}}, Args}, L) -> + calls(Args, [{normalise(Ln1),{Mod,Func,length(Args)}}|L]); +calls(T, L) when is_tuple(T) -> + calls(tuple_to_list(T), L); +calls([], L) -> + L; +calls(T, L) when is_list(T) -> + lists:foldl(fun calls/2, L, T); +calls(_, L) -> + L. + +normalise({_Line,_Col}=X) -> + X; +normalise({Line,Col,_Len,_Text}) -> + {Line, Col}. + + +prelude(L) -> + ["\n" + "\n" + "\n" + "\n" + ""]. + + +final({Tag, Toks}) -> + {Tag, {taggedToks, final1(Tag, Toks)}}. + +final1({Tag,_,_}, Toks) when Tag =:= func; Tag =:= func1 -> + %% io:format("fix_remote:~p~n",[Toks]), + fix_remote(Toks); +final1({export,_}, Toks) -> + fix_exports(Toks); +final1({import,_}, Toks) -> + fix_imports(Toks); +final1(_, Toks) -> + %% io:format("final:~p~n",[X]), + Toks. + + +fix_imports(Toks) -> + %% io:format("fix imports:~p~n",[Toks]), + Mod = find_imported_module(Toks), + %% io:format("Mod =~p~n",[Mod]), + fix_imports(Toks, Mod). + +fix_imports([{atom,A},{terminal,"/"},{integer,N}|T], Mod) -> + [{remote, Mod,A,list_to_integer(N),A++"/"++N}| + fix_imports(T, Mod)]; +fix_imports([H|T], Mod) -> + [H|fix_imports(T, Mod)]; +fix_imports([], _) -> + []. + +%% skip to the atom import, then take the first atom after import +find_imported_module([{atom,"import"}|T]) -> find_imported_module1(T); +find_imported_module([_|T]) -> find_imported_module(T). + +find_imported_module1([{atom,M}|_]) -> list_to_atom(M); +find_imported_module1([_|T]) -> find_imported_module1(T). + +%% won't work if there is white space between the symbols +%% fix later + +fix_exports([{atom,A},{terminal,"/"},{integer,N}|T]) -> + [{local,A,list_to_integer(N),A++"/"++N}|fix_exports(T)]; +fix_exports([H|T]) -> + [H|fix_exports(T)]; +fix_exports([]) -> + []. + +%% fix_remote merges Mod : Func into a single string +%% the problem is that +%% we only tag the first atom in a remote call mod:func(...) +%% mod is tagged as remote - but we want to +%% extend the tagging to include the entire mod:func +%% call ... + +fix_remote([{remote,M,F,A,Str},{terminal,":"},{atom,Str1}|T]) -> + [{remote,M,F,A,Str ++ ":" ++ Str1}|fix_remote(T)]; +fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{atom,Str1}|T]) -> + [{remote,M,F,A,Str ++ S1 ++ ":" ++ Str1}|fix_remote(T)]; +fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> + [{remote,M,F,A,Str ++ S1 ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; +fix_remote([{remote,M,F,A,Str},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> + [{remote,M,F,A,Str ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; +fix_remote([H|T]) -> + [H|fix_remote(T)]; +fix_remote([]) -> + []. + +-spec is_keyword(atom()) -> bool(). + +is_keyword('after' ) -> true; +is_keyword('and') -> true; +is_keyword('andalso' ) -> true; +is_keyword('band' ) -> true; +is_keyword('begin' ) -> true; +is_keyword('bnot' ) -> true; +is_keyword('bor' ) -> true; +is_keyword('bsl' ) -> true; +is_keyword('bsr' ) -> true; +is_keyword('bxor' ) -> true; +is_keyword('case' ) -> true; +is_keyword('catch' ) -> true; +is_keyword('cond') -> true; +is_keyword('div' ) -> true; +is_keyword('end' ) -> true; +is_keyword('fun' ) -> true; +is_keyword('if' ) -> true; +is_keyword('not') -> true; +is_keyword('of' ) -> true; +is_keyword('or' ) -> true; +is_keyword('orelse' ) -> true; +is_keyword('query' ) -> true; +is_keyword('receive' ) -> true; +is_keyword('rem' ) -> true; +is_keyword('spec') -> true; +is_keyword('try' ) -> true; +is_keyword('when') -> true; +is_keyword('xor') -> true; +is_keyword(_) -> false. + +is_terminal('!') -> true; +is_terminal('#') -> true; +is_terminal('(') -> true; +is_terminal(')') -> true; +is_terminal('*') -> true; +is_terminal('+') -> true; +is_terminal('++') -> true; +is_terminal(',') -> true; +is_terminal('-') -> true; +is_terminal('--') -> true; +is_terminal('->') -> true; +is_terminal('.') -> true; +is_terminal('/') -> true; +is_terminal('/=') -> true; +is_terminal(':') -> true; +is_terminal(':-') -> true; +is_terminal('::') -> true; +is_terminal(';') -> true; +is_terminal('<') -> true; +is_terminal('<-') -> true; +is_terminal('<<') -> true; +is_terminal('<=') -> true; +is_terminal('=') -> true; +is_terminal('=/=') -> true; +is_terminal('=:=') -> true; +is_terminal('=<') -> true; +is_terminal('==') -> true; +is_terminal('>') -> true; +is_terminal('>=') -> true; +is_terminal('>>') -> true; +is_terminal('?') -> true; +is_terminal('[') -> true; +is_terminal(']') -> true; +is_terminal('{') -> true; +is_terminal('|') -> true; +is_terminal('||') -> true; +is_terminal('}') -> true; +is_terminal(_) -> false. -- cgit v1.2.3