From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001
From: Erlang/OTP <otp@erlang.org>
Date: Fri, 20 Nov 2009 14:54:40 +0000
Subject: The R13B03 release.

---
 lib/wx/examples/demo/demo_html_tagger.erl | 534 ++++++++++++++++++++++++++++++
 1 file changed, 534 insertions(+)
 create mode 100644 lib/wx/examples/demo/demo_html_tagger.erl

(limited to 'lib/wx/examples/demo/demo_html_tagger.erl')

diff --git a/lib/wx/examples/demo/demo_html_tagger.erl b/lib/wx/examples/demo/demo_html_tagger.erl
new file mode 100644
index 0000000000..9b6d1fd950
--- /dev/null
+++ b/lib/wx/examples/demo/demo_html_tagger.erl
@@ -0,0 +1,534 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+
+-module(demo_html_tagger).
+
+%% You will notice that this program has very few type declarations
+%% That's because this program uses some pretty dodgy techniques to
+%% get at the data it requires.
+
+%% I use epp_dodger to parse the file and the new imporved erl_scan
+%% find the exact values of the tokens
+
+%% epp_dodger returns an objects of type erl_syntax which are pretty
+%% nasty buggers. We could write the types out but it would hardly
+%% help.
+
+%% to test run
+
+%%-compile(export_all).
+
+
+-export([erl2htmltext/1, erl2htmlfile/1]).
+
+erl2htmltext(File) ->
+    try
+	erl2html0(File)
+    catch
+	What:Why ->
+	    io:format("error in:~s ~p ~p~n",[File,What,Why])
+    end.
+
+erl2htmlfile(File) ->
+    try
+	Text = erl2html0(File),
+	Root = filename:basename(filename:rootname(File)),
+	Out = "./html/" ++ Root ++ ".html",
+	file:write_file(Out, [Text])
+    catch
+	What:Why ->
+	    io:format("error in:~s ~p ~p~n",[File,What,Why])
+    end.
+
+
+splitErlang(File) ->
+    {ok, Forms} = dodge_file(File),
+    {Anchors, Patches} = analyse(Forms),
+    Raw = read_raw_forms(File),
+    Raw1 = merge_anchors(Anchors, Raw),
+    Raw2 = merge_forms(Raw1, Patches, []),
+    Rtf = [final(I) || I <- Raw2],
+    {taggedBlocks, Rtf}.
+
+erl2html0(File) ->
+    Tb = splitErlang(File),
+    Html = to_html(Tb),
+    prelude(Html).
+
+merge_forms([{Tag,L1}|T], Patches, L) ->
+    {L2, Patches1} = apply_patches(L1, Patches),
+    merge_forms(T, Patches1, [{Tag,L2}|L]);
+merge_forms([], _, L) ->
+    lists:reverse(L).
+
+apply_patches(Toks, [])      ->
+    %% we've run out of patches but we must still simplify 
+    %% every term
+    {[simplify(I) || I <- Toks], []};
+apply_patches(Toks, Patches) ->
+    apply_patches(Toks, Patches, []).
+
+apply_patches([{atom,Ln,Val}=A|T], Patches, L) ->
+    case do_patch(Ln, Patches)  of
+	{yes, New, Patches1} ->
+	    New1 = reformat(New, Val),
+	    apply_patches(T, Patches1, [New1|L]);
+	{no, Patches1} ->
+	    apply_patches(T, Patches1, [simplify(A)|L])
+    end;
+apply_patches([H|T], Patches, L) ->
+    apply_patches(T, Patches, [simplify(H)|L]);
+apply_patches([], Patches, L) ->
+    {lists:reverse(L), Patches}.
+
+
+simplify({atom,_,Str}) ->
+    case (catch list_to_existing_atom(Str)) of
+	{'EXIT', _} ->
+	    {atom, Str};
+	A ->
+	    case is_keyword(A) of
+		true -> {keyword, Str};
+		false ->
+		    {atom, Str}
+	    end
+    end;
+simplify({dot,_,Str}) ->
+    {terminal, Str};
+simplify({Tag,_,Str}) ->
+    case is_keyword(Tag) of
+	true ->
+	    {keyword, Str};
+	false ->
+	    case is_terminal(Tag) of
+		true ->
+		    {terminal, Str};
+		false ->
+		    {Tag, Str}
+	    end
+    end;
+simplify(X) ->
+    io:format("simplify wtfit:~p~n",[X]),
+    X.
+
+do_patch(Ln, [{Ln,Tag}|P])                  -> {yes, Tag, P};
+do_patch(Ln, [{Ln1,_}|_] = P) when Ln1 > Ln -> {no, P};
+do_patch(Ln, [_|T])                         -> do_patch(Ln, T);
+do_patch(_, [])                             -> {no, []}.
+
+reformat({local,{F,A}}, Str) -> {local,F,A,Str};
+reformat({remote,M,F,A}, Str) -> {remote,M,F,A,Str};
+reformat({remote,{M,F,A}}, Str) -> {remote,M,F,A,Str};
+reformat({bif,{F,A}}, Str) -> {bif,F,A,Str};
+reformat(Tag, Str) ->
+    io:format("reformat*:~p ~p~n",[Tag,Str]),
+    {Tag,Str}.
+
+to_html({taggedBlocks, L}) ->
+    [[anchor1(Anchor),to_html(Body)] || {Anchor,Body} <- L];
+to_html({taggedToks, L}) ->
+    [to_html1(I) || I <- L].
+
+anchor1({func, F, A}) ->
+    ["<a name='",linkname(F,A),"'></a>"];
+anchor1({specification, F, A}) ->
+    ["<a name='",linkname(F,A),"'></a>"];
+anchor1(_X) ->
+    "".
+
+linkname(F, A) when is_atom(F) ->
+    a2s(F) ++ "-" ++ integer_to_list(A);
+linkname(F, A) when is_list(F) ->
+    F ++ "-" ++ integer_to_list(A).
+
+a2s(A) ->
+    atom_to_list(A).
+
+font(C, S) ->
+    ["<font color=\"", C, "\">", htmlquote(S), "</font>"].
+
+htmlquote("<" ++ T) -> ["&lt;"|htmlquote(T)];
+htmlquote([H|T]) -> [H|htmlquote(T)];
+htmlquote([]) -> [].
+
+to_html1({white_space,V}) -> V;
+to_html1({comment, V})    -> font("#B22222", V);
+to_html1({var,V})         -> font("orange", V);
+to_html1({string,V})      -> font("#FA8072", V);
+to_html1({integer,V})     -> font("#1111AA", V);
+to_html1({bif,_F,_A,Str}) -> font("#FF00FF", Str);
+to_html1({keyword, V})    -> font("#FF00FF", V);
+to_html1({atom, V})       -> V;
+to_html1({terminal,V})    -> V;
+to_html1({char,V})        -> V;
+to_html1({float,V})       -> V;
+to_html1({anchor,F,A}) ->
+    ["<a name='",linkname(F,A),"'></a>"];
+to_html1({local,F,A,Str}) ->  
+    ["<a href='#",linkname(F,A),"'>", 
+     htmlquote(Str),"</a>"];
+to_html1({remote,_M,_F,_A,Str}) ->  
+    %%["<a href='",htmlname(M), "#",linkname(F,A),"'>",htmlquote(Str),"</a>"],
+    Str.
+
+%% merge the anchors
+%% there should be one block per anchor
+%% we check the containing form (for safety)
+
+%% merge_anchors([{_,{file,_}}|A], B) -> 
+%%     merge_anchors(A, B);
+merge_anchors([{Tag,Val}=H|A], [B|T])  ->
+    case contains(Tag, B) of
+	true ->
+	    [{Val,B}|merge_anchors(A, T)];
+	false ->
+	    io:format("Logic error: H=~p B=~p~n",[H,B]),
+	    exit(1)
+    end;
+merge_anchors([], []) -> [];
+merge_anchors([], [X]) ->
+    %% this is the last block -
+    %% trailing white space and comments have no tag
+    %% because eos is not a tag ...
+    [{eof, X}];
+merge_anchors(X, Y) -> 
+    io:format("ops:~p~n",[{X,Y}]),
+    [].
+
+contains(Loc, [{_,Loc,_}|_]) -> true;
+contains(Loc, [_|T]) ->  contains(Loc, T);
+contains(_, []) -> false.
+
+
+dodge_file(File) ->   
+    case file:open(File, [read]) of
+	{ok, Handle} ->
+	    {ok, F} = epp_dodger:parse(Handle, {1,1}),
+	    file:close(Handle),
+	    L = [revert_forms(I) || I <- F],
+	    {ok, L};
+	Error ->
+	    Error
+    end.
+
+revert_forms(F) ->
+    case erl_syntax:is_form(F) of
+	true ->
+	    %% revert fails on ifdef ... etc
+	    case (catch erl_syntax:revert(F)) of
+		{'EXIT', _Why} ->
+		    io:format("error reverting:~p=~p~n",[F,_Why]),
+		    F;
+		Other ->
+		    Other
+	    end;
+	false ->
+	    io:format("uugh:~p~n",[F])
+    end.
+
+%% read up to dot
+%% read_raw_forms(File) -> [form()]
+%% form() = [tok()]
+%% tok() = {Type,{Line::int,Col::int},string}
+%% Type = atom | int | var | string ...
+
+read_raw_forms(File) ->
+    {ok, Bin} = file:read_file(File),
+    Str = binary_to_list(Bin),
+    loop(erl_scan:tokens([], Str, {1,1}, [return,text]), []).
+
+loop({done, {eof,_}, eof}, L) ->
+    lists:reverse(L);
+loop({done, {ok, Toks, _}, eof}, L) ->
+    lists:reverse([normalize_toks(Toks)|L]);
+loop({done, {ok, Toks, Ln}, Str1}, L) ->
+    loop(erl_scan:tokens([], Str1, Ln, [return,text]), 
+	 [normalize_toks(Toks)|L]);
+loop({more, X}, L) ->
+    loop(erl_scan:tokens(X, eof, {1,1}, [return,text]), L).
+
+normalize_toks(Toks) ->
+    [normalize_tok(I) || I <- Toks].
+
+normalize_tok(Tok) ->
+    %% this is the portable way ...
+    [{_,Type},{_,Line},{_,Col},{_,Txt}] = 
+	erl_scan:token_info(Tok, [category,line,column,text]),
+    Val  = {Type,{Line,Col},Txt},
+    %% io:format("here:X=~p ~p~n",[Tok,Val]),
+    Val.
+
+
+%% analse the result of dodge_file
+
+analyse(Forms) ->
+    Calls = calls(Forms),
+    Anchors = compute_anchors(Forms),
+    Imports = [{{F,A},Mod} ||
+		  {attribute,_,import,{Mod,L}} <- Forms, {F,A} <- L],
+    D = dict:from_list(Imports),
+    Patches = [{Loc, resolve(X, D)} || {Loc, X} <- Calls],
+    {Anchors, Patches}.
+
+%% An anchor is something that is placed at the start of every form
+%% The anchor is named after the first item in the form
+%% compute_anchors(Forms) -> [{{Line,Col}, anchor()}]
+%%   {Line,Col} is the line and column of where the
+%%   form starts - this is not the same as the first token in 
+%%   the form since we might have skipped comments and white space
+%%   at the start of the form.
+%%   anchor() is a term decscribing the anchor
+%%   anchor(() = {func,Name,Aritry} (for functions)
+%%             | 
+%%             | {Type,{Line,Col}} anythis else    
+
+compute_anchors(Forms) ->
+    A1 = [anchor0(I) || I <- Forms],
+    merge_specs(A1).
+
+%% If a specification is immediately followed by
+%% a function when we promote the function anchor to point
+%% at the specification.
+%% We change the second tag to func2 - because we still want a
+%% tag for every block 
+
+merge_specs([{_Ln1,{specification,F,A}}=H,{Ln2, {func,F,A}}|T]) ->
+    [H,{Ln2,{func1,F,A}}|merge_specs(T)];
+merge_specs([H|T]) ->
+    [H|merge_specs(T)];
+merge_specs([]) ->
+    [].
+
+anchor0(I) ->
+    case  anchor(I) of
+	{{Line,Col,_,_}, Val} ->
+	    {{Line,Col}, Val};
+	{{_,_}, _} = X ->
+	    X
+    end.
+
+anchor({function, Ln, F, A, _}) -> {Ln, {func, F, A}};
+anchor({attribute,Ln,'spec', {{F,A},_}}) ->
+    {Ln, {specification,F,A}};
+anchor({attribute,Ln,module, M}) ->
+    {Ln, {module,M}};
+anchor({attribute,Ln,Type,_}) -> {Ln, {Type, Ln}};
+anchor({eof,Ln}) -> {Ln, eof};
+anchor({error,{Ln,_,_}}) -> 
+    %% Ln is in a different format in errors (sigh)
+    {Line, Col} = Ln,
+    Ln1 = {Line,Col,0,""},
+    {Ln1, {error, Ln}};
+anchor({tree,attribute,{attr,{_,_,_,Type}=Ln,_,_},_}) -> 
+    {Ln, {attribute,Type,Ln}};
+anchor({tree,attribute,_,
+	{attribute, {atom,Ln,Type}, _}}) ->
+    {Ln, {attribute,Type,Ln}};
+anchor({tree,attribute,
+	{attr,Ln,[],none},
+	_}=X) ->
+    io:format("FIX ME this is a bug????:~p~n",[X]),
+    {Ln, {other, Ln}};
+anchor(X) ->
+    %% this is some syntactic form that I don't know
+    %% about yet ...
+    io:format("FIX ME this is a bug????:~p~n",[X]),
+    exit(1).
+    
+resolve({F,A}=Tup, D) ->
+    case dict:find({F,A}, D) of
+	{ok, Mod} ->
+	    {remote,Mod,F,A};
+	error ->
+	    case erlang:is_builtin(erlang, F, A) of
+		true  -> {bif, {F,A}};
+		false -> {local,Tup}
+	    end
+    end;
+resolve({erlang,F,A}, _) ->
+    {bif,{F,A}};
+resolve({anchor,_,_}=A, _) ->
+    A;
+resolve(X, _D) ->
+    {remote, X}.
+
+calls(X) -> lists:reverse(calls(X, [])).
+
+calls({call,_,{atom,Ln,Func},Args}, L) ->
+    calls(Args, [{normalise(Ln),{Func,length(Args)}}|L]);
+calls({call,_,{remote,_,{atom,Ln1,Mod},{atom,_Ln2,Func}}, Args}, L) ->
+    calls(Args, [{normalise(Ln1),{Mod,Func,length(Args)}}|L]);
+calls(T, L) when is_tuple(T) ->
+    calls(tuple_to_list(T), L);
+calls([], L) ->
+    L;
+calls(T, L) when is_list(T) ->
+    lists:foldl(fun calls/2, L, T);
+calls(_, L) ->
+    L.
+
+normalise({_Line,_Col}=X) ->
+    X;
+normalise({Line,Col,_Len,_Text}) ->
+    {Line, Col}.
+
+
+prelude(L) ->
+    ["<html>\n"
+     "<head>\n"
+     "</head>\n"
+     "<body>\n"
+     "<ul><pre>\n",L,"\n</pre></ul></body>"].
+
+
+final({Tag, Toks}) ->
+    {Tag, {taggedToks, final1(Tag, Toks)}}.
+
+final1({Tag,_,_}, Toks) when Tag =:= func; Tag =:= func1 ->
+    %% io:format("fix_remote:~p~n",[Toks]),
+    fix_remote(Toks);
+final1({export,_}, Toks) ->
+    fix_exports(Toks);
+final1({import,_}, Toks) ->
+    fix_imports(Toks);
+final1(_, Toks) ->
+    %% io:format("final:~p~n",[X]),
+    Toks.
+
+
+fix_imports(Toks) ->
+    %% io:format("fix imports:~p~n",[Toks]),
+    Mod = find_imported_module(Toks),
+    %% io:format("Mod =~p~n",[Mod]),
+    fix_imports(Toks, Mod).
+
+fix_imports([{atom,A},{terminal,"/"},{integer,N}|T], Mod) ->
+    [{remote, Mod,A,list_to_integer(N),A++"/"++N}|
+     fix_imports(T, Mod)];
+fix_imports([H|T], Mod) ->
+    [H|fix_imports(T, Mod)];
+fix_imports([], _) ->
+    [].
+
+%% skip to the atom import, then take the first atom after import
+find_imported_module([{atom,"import"}|T]) -> find_imported_module1(T);
+find_imported_module([_|T])               -> find_imported_module(T).
+
+find_imported_module1([{atom,M}|_]) -> list_to_atom(M);
+find_imported_module1([_|T])        -> find_imported_module1(T).
+
+%% won't work if there is white space between the symbols
+%% fix later
+
+fix_exports([{atom,A},{terminal,"/"},{integer,N}|T]) ->
+    [{local,A,list_to_integer(N),A++"/"++N}|fix_exports(T)];
+fix_exports([H|T]) ->
+    [H|fix_exports(T)];
+fix_exports([]) ->
+    [].
+
+%% fix_remote merges Mod : Func into a single string
+%%  the problem is that
+%%  we only tag the first atom in a remote call mod:func(...)
+%%  mod is tagged as remote - but we want to
+%%  extend the tagging to include the entire mod:func
+%%  call ...
+
+fix_remote([{remote,M,F,A,Str},{terminal,":"},{atom,Str1}|T]) ->
+    [{remote,M,F,A,Str ++ ":" ++ Str1}|fix_remote(T)];
+fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{atom,Str1}|T]) ->
+    [{remote,M,F,A,Str ++ S1 ++ ":" ++ Str1}|fix_remote(T)];
+fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{white_space,S2},{atom,Str1}|T]) ->
+    [{remote,M,F,A,Str ++ S1 ++ ":" ++ S2 ++ Str1}|fix_remote(T)];
+fix_remote([{remote,M,F,A,Str},{terminal,":"},{white_space,S2},{atom,Str1}|T]) ->
+    [{remote,M,F,A,Str ++ ":" ++ S2 ++ Str1}|fix_remote(T)];
+fix_remote([H|T]) ->
+    [H|fix_remote(T)];
+fix_remote([]) ->
+    [].
+
+-spec is_keyword(atom()) -> bool().
+
+is_keyword('after' ) -> true;
+is_keyword('and') -> true;
+is_keyword('andalso' ) -> true;
+is_keyword('band' ) -> true;
+is_keyword('begin' ) -> true;
+is_keyword('bnot' ) -> true;
+is_keyword('bor' ) -> true;
+is_keyword('bsl' ) -> true;
+is_keyword('bsr' ) -> true;
+is_keyword('bxor' ) -> true;
+is_keyword('case' ) -> true;
+is_keyword('catch' ) -> true;
+is_keyword('cond') -> true;
+is_keyword('div' ) -> true;
+is_keyword('end' ) -> true;
+is_keyword('fun' ) -> true;
+is_keyword('if' ) -> true;
+is_keyword('not') -> true;
+is_keyword('of' ) -> true;
+is_keyword('or' ) -> true;
+is_keyword('orelse' ) -> true;
+is_keyword('query' ) -> true;
+is_keyword('receive' ) -> true;
+is_keyword('rem' ) -> true;
+is_keyword('spec') -> true;
+is_keyword('try' ) -> true;
+is_keyword('when') -> true;
+is_keyword('xor') -> true;
+is_keyword(_) -> false.
+
+is_terminal('!')  -> true;
+is_terminal('#')  -> true;
+is_terminal('(')  -> true;
+is_terminal(')')  -> true;
+is_terminal('*')  -> true;
+is_terminal('+')  -> true;
+is_terminal('++') -> true;
+is_terminal(',')  -> true;
+is_terminal('-')  -> true;
+is_terminal('--') -> true;
+is_terminal('->') -> true;
+is_terminal('.')  -> true;
+is_terminal('/')  -> true;
+is_terminal('/=') -> true;
+is_terminal(':')  -> true;
+is_terminal(':-') -> true;
+is_terminal('::') -> true;
+is_terminal(';')  -> true;
+is_terminal('<')  -> true;
+is_terminal('<-') -> true;
+is_terminal('<<') -> true;
+is_terminal('<=') -> true;
+is_terminal('=')  -> true;
+is_terminal('=/=') -> true;
+is_terminal('=:=') -> true;
+is_terminal('=<') -> true;
+is_terminal('==') -> true;
+is_terminal('>')  -> true;
+is_terminal('>=') -> true;
+is_terminal('>>') -> true;
+is_terminal('?')  -> true;
+is_terminal('[')  -> true;
+is_terminal(']')  -> true;
+is_terminal('{')  -> true;
+is_terminal('|')  -> true;
+is_terminal('||') -> true;
+is_terminal('}')  -> true;
+is_terminal(_) -> false.
-- 
cgit v1.2.3