%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2009-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% -module(demo_html_tagger). %% You will notice that this program has very few type declarations %% That's because this program uses some pretty dodgy techniques to %% get at the data it requires. %% I use epp_dodger to parse the file and the new imporved erl_scan %% find the exact values of the tokens %% epp_dodger returns an objects of type erl_syntax which are pretty %% nasty buggers. We could write the types out but it would hardly %% help. %% to test run %%-compile(export_all). -export([erl2htmltext/1, erl2htmlfile/1]). erl2htmltext(File) -> try erl2html0(File) catch What:Why -> io:format("error in:~s ~p ~p~n",[File,What,Why]) end. erl2htmlfile(File) -> try Text = erl2html0(File), Root = filename:basename(filename:rootname(File)), Out = "./html/" ++ Root ++ ".html", file:write_file(Out, [Text]) catch What:Why -> io:format("error in:~s ~p ~p~n",[File,What,Why]) end. splitErlang(File) -> {ok, Forms} = dodge_file(File), {Anchors, Patches} = analyse(Forms), Raw = read_raw_forms(File), Raw1 = merge_anchors(Anchors, Raw), Raw2 = merge_forms(Raw1, Patches, []), Rtf = [final(I) || I <- Raw2], {taggedBlocks, Rtf}. erl2html0(File) -> Tb = splitErlang(File), Html = to_html(Tb), prelude(Html). merge_forms([{Tag,L1}|T], Patches, L) -> {L2, Patches1} = apply_patches(L1, Patches), merge_forms(T, Patches1, [{Tag,L2}|L]); merge_forms([], _, L) -> lists:reverse(L). apply_patches(Toks, []) -> %% we've run out of patches but we must still simplify %% every term {[simplify(I) || I <- Toks], []}; apply_patches(Toks, Patches) -> apply_patches(Toks, Patches, []). apply_patches([{atom,Ln,Val}=A|T], Patches, L) -> case do_patch(Ln, Patches) of {yes, New, Patches1} -> New1 = reformat(New, Val), apply_patches(T, Patches1, [New1|L]); {no, Patches1} -> apply_patches(T, Patches1, [simplify(A)|L]) end; apply_patches([H|T], Patches, L) -> apply_patches(T, Patches, [simplify(H)|L]); apply_patches([], Patches, L) -> {lists:reverse(L), Patches}. simplify({atom,_,Str}) -> case (catch list_to_existing_atom(Str)) of {'EXIT', _} -> {atom, Str}; A -> case is_keyword(A) of true -> {keyword, Str}; false -> {atom, Str} end end; simplify({dot,_,Str}) -> {terminal, Str}; simplify({Tag,_,Str}) -> case is_keyword(Tag) of true -> {keyword, Str}; false -> case is_terminal(Tag) of true -> {terminal, Str}; false -> {Tag, Str} end end; simplify(X) -> io:format("simplify wtfit:~p~n",[X]), X. do_patch(Ln, [{Ln,Tag}|P]) -> {yes, Tag, P}; do_patch(Ln, [{Ln1,_}|_] = P) when Ln1 > Ln -> {no, P}; do_patch(Ln, [_|T]) -> do_patch(Ln, T); do_patch(_, []) -> {no, []}. reformat({local,{F,A}}, Str) -> {local,F,A,Str}; reformat({remote,M,F,A}, Str) -> {remote,M,F,A,Str}; reformat({remote,{M,F,A}}, Str) -> {remote,M,F,A,Str}; reformat({bif,{F,A}}, Str) -> {bif,F,A,Str}; reformat(Tag, Str) -> io:format("reformat*:~p ~p~n",[Tag,Str]), {Tag,Str}. to_html({taggedBlocks, L}) -> [[anchor1(Anchor),to_html(Body)] || {Anchor,Body} <- L]; to_html({taggedToks, L}) -> [to_html1(I) || I <- L]. anchor1({func, F, A}) -> [""]; anchor1({specification, F, A}) -> [""]; anchor1(_X) -> "". linkname(F, A) when is_atom(F) -> a2s(F) ++ "-" ++ integer_to_list(A); linkname(F, A) when is_list(F) -> F ++ "-" ++ integer_to_list(A). a2s(A) -> atom_to_list(A). font(C, S) -> ["", htmlquote(S), ""]. htmlquote("<" ++ T) -> ["<"|htmlquote(T)]; htmlquote([H|T]) -> [H|htmlquote(T)]; htmlquote([]) -> []. to_html1({white_space,V}) -> V; to_html1({comment, V}) -> font("#B22222", V); to_html1({var,V}) -> font("orange", V); to_html1({string,V}) -> font("#FA8072", V); to_html1({integer,V}) -> font("#1111AA", V); to_html1({bif,_F,_A,Str}) -> font("#FF00FF", Str); to_html1({keyword, V}) -> font("#FF00FF", V); to_html1({atom, V}) -> V; to_html1({terminal,V}) -> V; to_html1({char,V}) -> V; to_html1({float,V}) -> V; to_html1({anchor,F,A}) -> [""]; to_html1({local,F,A,Str}) -> ["", htmlquote(Str),""]; to_html1({remote,_M,_F,_A,Str}) -> %%["",htmlquote(Str),""], Str. %% merge the anchors %% there should be one block per anchor %% we check the containing form (for safety) %% merge_anchors([{_,{file,_}}|A], B) -> %% merge_anchors(A, B); merge_anchors([{Tag,Val}=H|A], [B|T]) -> case contains(Tag, B) of true -> [{Val,B}|merge_anchors(A, T)]; false -> io:format("Logic error: H=~p B=~p~n",[H,B]), exit(1) end; merge_anchors([], []) -> []; merge_anchors([], [X]) -> %% this is the last block - %% trailing white space and comments have no tag %% because eos is not a tag ... [{eof, X}]; merge_anchors(X, Y) -> io:format("ops:~p~n",[{X,Y}]), []. contains(Loc, [{_,Loc,_}|_]) -> true; contains(Loc, [_|T]) -> contains(Loc, T); contains(_, []) -> false. dodge_file(File) -> case file:open(File, [read]) of {ok, Handle} -> {ok, F} = epp_dodger:parse(Handle, {1,1}), file:close(Handle), L = [revert_forms(I) || I <- F], {ok, L}; Error -> Error end. revert_forms(F) -> case erl_syntax:is_form(F) of true -> %% revert fails on ifdef ... etc case (catch erl_syntax:revert(F)) of {'EXIT', _Why} -> io:format("error reverting:~p=~p~n",[F,_Why]), F; Other -> Other end; false -> io:format("uugh:~p~n",[F]) end. %% read up to dot %% read_raw_forms(File) -> [form()] %% form() = [tok()] %% tok() = {Type,{Line::int,Col::int},string} %% Type = atom | int | var | string ... read_raw_forms(File) -> {ok, Bin} = file:read_file(File), Str = binary_to_list(Bin), loop(erl_scan:tokens([], Str, {1,1}, [return,text]), []). loop({done, {eof,_}, eof}, L) -> lists:reverse(L); loop({done, {ok, Toks, _}, eof}, L) -> lists:reverse([normalize_toks(Toks)|L]); loop({done, {ok, Toks, Ln}, Str1}, L) -> loop(erl_scan:tokens([], Str1, Ln, [return,text]), [normalize_toks(Toks)|L]); loop({more, X}, L) -> loop(erl_scan:tokens(X, eof, {1,1}, [return,text]), L). normalize_toks(Toks) -> [normalize_tok(I) || I <- Toks]. normalize_tok(Tok) -> %% this is the portable way ... [{_,Type},{_,Line},{_,Col},{_,Txt}] = erl_scan:token_info(Tok, [category,line,column,text]), Val = {Type,{Line,Col},Txt}, %% io:format("here:X=~p ~p~n",[Tok,Val]), Val. %% analse the result of dodge_file analyse(Forms) -> Calls = calls(Forms), Anchors = compute_anchors(Forms), Imports = [{{F,A},Mod} || {attribute,_,import,{Mod,L}} <- Forms, {F,A} <- L], D = dict:from_list(Imports), Patches = [{Loc, resolve(X, D)} || {Loc, X} <- Calls], {Anchors, Patches}. %% An anchor is something that is placed at the start of every form %% The anchor is named after the first item in the form %% compute_anchors(Forms) -> [{{Line,Col}, anchor()}] %% {Line,Col} is the line and column of where the %% form starts - this is not the same as the first token in %% the form since we might have skipped comments and white space %% at the start of the form. %% anchor() is a term decscribing the anchor %% anchor(() = {func,Name,Aritry} (for functions) %% | %% | {Type,{Line,Col}} anythis else compute_anchors(Forms) -> A1 = [anchor0(I) || I <- Forms], merge_specs(A1). %% If a specification is immediately followed by %% a function when we promote the function anchor to point %% at the specification. %% We change the second tag to func2 - because we still want a %% tag for every block merge_specs([{_Ln1,{specification,F,A}}=H,{Ln2, {func,F,A}}|T]) -> [H,{Ln2,{func1,F,A}}|merge_specs(T)]; merge_specs([H|T]) -> [H|merge_specs(T)]; merge_specs([]) -> []. anchor0(I) -> case anchor(I) of {{Line,Col,_,_}, Val} -> {{Line,Col}, Val}; {{_,_}, _} = X -> X end. anchor({function, Ln, F, A, _}) -> {Ln, {func, F, A}}; anchor({attribute,Ln,'spec', {{F,A},_}}) -> {Ln, {specification,F,A}}; anchor({attribute,Ln,module, M}) -> {Ln, {module,M}}; anchor({attribute,Ln,Type,_}) -> {Ln, {Type, Ln}}; anchor({eof,Ln}) -> {Ln, eof}; anchor({error,{Ln,_,_}}) -> %% Ln is in a different format in errors (sigh) {Line, Col} = Ln, Ln1 = {Line,Col,0,""}, {Ln1, {error, Ln}}; anchor({tree,attribute,{attr,{_,_,_,Type}=Ln,_,_},_}) -> {Ln, {attribute,Type,Ln}}; anchor({tree,attribute,_, {attribute, {atom,Ln,Type}, _}}) -> {Ln, {attribute,Type,Ln}}; anchor({tree,attribute, {attr,Ln,[],none}, _}=X) -> io:format("FIX ME this is a bug????:~p~n",[X]), {Ln, {other, Ln}}; anchor(X) -> %% this is some syntactic form that I don't know %% about yet ... io:format("FIX ME this is a bug????:~p~n",[X]), exit(1). resolve({F,A}=Tup, D) -> case dict:find({F,A}, D) of {ok, Mod} -> {remote,Mod,F,A}; error -> case erlang:is_builtin(erlang, F, A) of true -> {bif, {F,A}}; false -> {local,Tup} end end; resolve({erlang,F,A}, _) -> {bif,{F,A}}; resolve({anchor,_,_}=A, _) -> A; resolve(X, _D) -> {remote, X}. calls(X) -> lists:reverse(calls(X, [])). calls({call,_,{atom,Ln,Func},Args}, L) -> calls(Args, [{normalise(Ln),{Func,length(Args)}}|L]); calls({call,_,{remote,_,{atom,Ln1,Mod},{atom,_Ln2,Func}}, Args}, L) -> calls(Args, [{normalise(Ln1),{Mod,Func,length(Args)}}|L]); calls(T, L) when is_tuple(T) -> calls(tuple_to_list(T), L); calls([], L) -> L; calls(T, L) when is_list(T) -> lists:foldl(fun calls/2, L, T); calls(_, L) -> L. normalise({_Line,_Col}=X) -> X; normalise({Line,Col,_Len,_Text}) -> {Line, Col}. prelude(L) -> ["\n" "\n" "\n" "\n" ""]. final({Tag, Toks}) -> {Tag, {taggedToks, final1(Tag, Toks)}}. final1({Tag,_,_}, Toks) when Tag =:= func; Tag =:= func1 -> %% io:format("fix_remote:~p~n",[Toks]), fix_remote(Toks); final1({export,_}, Toks) -> fix_exports(Toks); final1({import,_}, Toks) -> fix_imports(Toks); final1(_, Toks) -> %% io:format("final:~p~n",[X]), Toks. fix_imports(Toks) -> %% io:format("fix imports:~p~n",[Toks]), Mod = find_imported_module(Toks), %% io:format("Mod =~p~n",[Mod]), fix_imports(Toks, Mod). fix_imports([{atom,A},{terminal,"/"},{integer,N}|T], Mod) -> [{remote, Mod,A,list_to_integer(N),A++"/"++N}| fix_imports(T, Mod)]; fix_imports([H|T], Mod) -> [H|fix_imports(T, Mod)]; fix_imports([], _) -> []. %% skip to the atom import, then take the first atom after import find_imported_module([{atom,"import"}|T]) -> find_imported_module1(T); find_imported_module([_|T]) -> find_imported_module(T). find_imported_module1([{atom,M}|_]) -> list_to_atom(M); find_imported_module1([_|T]) -> find_imported_module1(T). %% won't work if there is white space between the symbols %% fix later fix_exports([{atom,A},{terminal,"/"},{integer,N}|T]) -> [{local,A,list_to_integer(N),A++"/"++N}|fix_exports(T)]; fix_exports([H|T]) -> [H|fix_exports(T)]; fix_exports([]) -> []. %% fix_remote merges Mod : Func into a single string %% the problem is that %% we only tag the first atom in a remote call mod:func(...) %% mod is tagged as remote - but we want to %% extend the tagging to include the entire mod:func %% call ... fix_remote([{remote,M,F,A,Str},{terminal,":"},{atom,Str1}|T]) -> [{remote,M,F,A,Str ++ ":" ++ Str1}|fix_remote(T)]; fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{atom,Str1}|T]) -> [{remote,M,F,A,Str ++ S1 ++ ":" ++ Str1}|fix_remote(T)]; fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> [{remote,M,F,A,Str ++ S1 ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; fix_remote([{remote,M,F,A,Str},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> [{remote,M,F,A,Str ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; fix_remote([H|T]) -> [H|fix_remote(T)]; fix_remote([]) -> []. -spec is_keyword(atom()) -> boolean(). is_keyword('after' ) -> true; is_keyword('and') -> true; is_keyword('andalso' ) -> true; is_keyword('band' ) -> true; is_keyword('begin' ) -> true; is_keyword('bnot' ) -> true; is_keyword('bor' ) -> true; is_keyword('bsl' ) -> true; is_keyword('bsr' ) -> true; is_keyword('bxor' ) -> true; is_keyword('case' ) -> true; is_keyword('catch' ) -> true; is_keyword('cond') -> true; is_keyword('div' ) -> true; is_keyword('end' ) -> true; is_keyword('fun' ) -> true; is_keyword('if' ) -> true; is_keyword('not') -> true; is_keyword('of' ) -> true; is_keyword('or' ) -> true; is_keyword('orelse' ) -> true; is_keyword('receive' ) -> true; is_keyword('rem' ) -> true; is_keyword('spec') -> true; is_keyword('try' ) -> true; is_keyword('when') -> true; is_keyword('xor') -> true; is_keyword(_) -> false. is_terminal('!') -> true; is_terminal('#') -> true; is_terminal('(') -> true; is_terminal(')') -> true; is_terminal('*') -> true; is_terminal('+') -> true; is_terminal('++') -> true; is_terminal(',') -> true; is_terminal('-') -> true; is_terminal('--') -> true; is_terminal('->') -> true; is_terminal('.') -> true; is_terminal('/') -> true; is_terminal('/=') -> true; is_terminal(':') -> true; is_terminal(':-') -> true; is_terminal('::') -> true; is_terminal(';') -> true; is_terminal('<') -> true; is_terminal('<-') -> true; is_terminal('<<') -> true; is_terminal('<=') -> true; is_terminal('=') -> true; is_terminal('=/=') -> true; is_terminal('=:=') -> true; is_terminal('=<') -> true; is_terminal('==') -> true; is_terminal('>') -> true; is_terminal('>=') -> true; is_terminal('>>') -> true; is_terminal('?') -> true; is_terminal('[') -> true; is_terminal(']') -> true; is_terminal('{') -> true; is_terminal('|') -> true; is_terminal('||') -> true; is_terminal('}') -> true; is_terminal(_) -> false.