From b941448860fc2220695d04d8b8806509cb053b5a Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 3 Dec 2012 12:34:04 +0100 Subject: [edoc] Introduce Unicode support for source files --- lib/edoc/priv/edoc.dtd | 4 +++- lib/edoc/src/edoc.erl | 3 ++- lib/edoc/src/edoc.hrl | 6 ++++-- lib/edoc/src/edoc_data.erl | 3 ++- lib/edoc/src/edoc_doclet.erl | 20 ++++++++++++++++---- lib/edoc/src/edoc_extract.erl | 14 +++++++++++--- lib/edoc/src/edoc_layout.erl | 15 ++++++++++++--- lib/edoc/src/edoc_lib.erl | 31 ++++++++++++++++++++++++------- lib/edoc/src/edoc_macros.erl | 4 ++-- lib/edoc/src/edoc_wiki.erl | 3 ++- 10 files changed, 78 insertions(+), 25 deletions(-) diff --git a/lib/edoc/priv/edoc.dtd b/lib/edoc/priv/edoc.dtd index 6a332cf22f..ba4ac0db28 100644 --- a/lib/edoc/priv/edoc.dtd +++ b/lib/edoc/priv/edoc.dtd @@ -4,7 +4,8 @@ + root CDATA #IMPLIED + encoding CDATA #IMPLIED> @@ -25,6 +26,7 @@ name CDATA #REQUIRED private (yes | no) #IMPLIED hidden (yes | no) #IMPLIED + encoding CDATA #IMPLIED root CDATA #IMPLIED> diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl index 544465b14a..5a599e6e97 100644 --- a/lib/edoc/src/edoc.erl +++ b/lib/edoc/src/edoc.erl @@ -120,7 +120,8 @@ file(Name, Options) -> Suffix = proplists:get_value(file_suffix, Options, ?DEFAULT_FILE_SUFFIX), Dir = proplists:get_value(dir, Options, filename:dirname(Name)), - edoc_lib:write_file(Text, Dir, BaseName ++ Suffix). + Encoding = [{encoding, edoc_lib:read_encoding(Name, [])}], + edoc_lib:write_file(Text, Dir, BaseName ++ Suffix, '', Encoding). %% TODO: better documentation of files/1/2, packages/1/2, application/1/2/3 diff --git a/lib/edoc/src/edoc.hrl b/lib/edoc/src/edoc.hrl index 98debba4ab..44c5d6fef4 100644 --- a/lib/edoc/src/edoc.hrl +++ b/lib/edoc/src/edoc.hrl @@ -48,7 +48,8 @@ %% functions = ordset(function_name()), %% exports = ordset(function_name()), %% attributes = ordset({atom(), term()}), -%% records = [{atom(), [{atom(), term()}]}]} +%% records = [{atom(), [{atom(), term()}]}], +%% encoding = epp:source_encoding()} %% ordset(T) = sets:ordset(T) %% function_name(T) = {atom(), integer()} @@ -57,7 +58,8 @@ functions = [], exports = [], attributes = [], - records = [] + records = [], + encoding = latin1 }). %% Environment for generating documentation data diff --git a/lib/edoc/src/edoc_data.erl b/lib/edoc/src/edoc_data.erl index 624f9177a2..f88ba05f4b 100644 --- a/lib/edoc/src/edoc_data.erl +++ b/lib/edoc/src/edoc_data.erl @@ -83,7 +83,8 @@ module(Module, Entries, Env, Opts) -> AllTags = get_all_tags(Entries), Functions = function_filter(Entries, Opts), Out = {module, ([{name, Name}, - {root, Env#env.root}] + {root, Env#env.root}, + {encoding, Module#module.encoding}] ++ case is_private(HeaderTags) of true -> [{private, "yes"}]; false -> [] diff --git a/lib/edoc/src/edoc_doclet.erl b/lib/edoc/src/edoc_doclet.erl index 385d20e9ae..d6561e10fc 100644 --- a/lib/edoc/src/edoc_doclet.erl +++ b/lib/edoc/src/edoc_doclet.erl @@ -193,7 +193,8 @@ source({M, P, Name, Path}, Dir, Suffix, Env, Set, Private, Hidden, true -> Text = edoc:layout(Doc, Options), Name1 = packages:last(M) ++ Suffix, - edoc_lib:write_file(Text, Dir, Name1, P), + Encoding = [{encoding,encoding(Doc)}], + edoc_lib:write_file(Text, Dir, Name1, P, Encoding), {sets:add_element(Module, Set), Error}; false -> {Set, Error} @@ -359,14 +360,19 @@ xhtml_1(Title, CSS, Body) -> overview(Dir, Title, Env, Opts) -> File = proplists:get_value(overview, Opts, filename:join(Dir, ?OVERVIEW_FILE)), + Encoding = edoc_lib:read_encoding(File, [{in_comment_only, false}]), Tags = read_file(File, overview, Env, Opts), - Data = edoc_data:overview(Title, Tags, Env, Opts), + Data0 = edoc_data:overview(Title, Tags, Env, Opts), + EncodingAttribute = #xmlAttribute{name = encoding, + value = atom_to_list(Encoding)}, + #xmlElement{attributes = As} = Data0, + Data = Data0#xmlElement{attributes = [EncodingAttribute | As]}, F = fun (M) -> M:overview(Data, Opts) end, Text = edoc_lib:run_layout(F, Opts), - edoc_lib:write_file(Text, Dir, ?OVERVIEW_SUMMARY). - + EncOpts = [{encoding,Encoding}], + edoc_lib:write_file(Text, Dir, ?OVERVIEW_SUMMARY, '', EncOpts). copy_image(Dir) -> case code:priv_dir(?EDOC_APP) of @@ -441,6 +447,12 @@ is_hidden(E) -> _ -> false end. +encoding(E) -> + case get_attrval(encoding, E) of + "latin1" -> latin1; + _ -> utf8 + end. + get_attrval(Name, #xmlElement{attributes = As}) -> case get_attr(Name, As) of [#xmlAttribute{value = V}] -> diff --git a/lib/edoc/src/edoc_extract.erl b/lib/edoc/src/edoc_extract.erl index 5a79e127f6..dcab816f54 100644 --- a/lib/edoc/src/edoc_extract.erl +++ b/lib/edoc/src/edoc_extract.erl @@ -226,7 +226,7 @@ add_macro_defs(Defs0, Opts, Env) -> %% lines of text before the first tag are ignored. `Env' is an %% environment created by {@link edoc_lib:get_doc_env/4}. Upon error, %% `Reason' is an atom returned from the call to {@link -%% //kernel/file:read_file/1}. +%% //kernel/file:read_file/1} or the atom 'invalid_unicode'. %% %% See {@link text/4} for options. @@ -235,7 +235,13 @@ add_macro_defs(Defs0, Opts, Env) -> file(File, Context, Env, Opts) -> case file:read_file(File) of {ok, Bin} -> - {ok, text(binary_to_list(Bin), Context, Env, Opts, File)}; + Enc = edoc_lib:read_encoding(File,[{in_comment_only, false}]), + case catch unicode:characters_to_list(Bin, Enc) of + String when is_list(String) -> + {ok, text(String, Context, Env, Opts, File)}; + _ -> + {error, invalid_unicode} + end; {error, _} = Error -> Error end. @@ -306,12 +312,14 @@ get_module_info(Forms, File) -> Exports = ordsets:from_list(get_list_keyval(exports, L)), Attributes = ordsets:from_list(get_list_keyval(attributes, L)), Records = get_list_keyval(records, L), + Encoding = edoc_lib:read_encoding(File, []), #module{name = Name, parameters = Vars, functions = Functions, exports = ordsets:intersection(Exports, Functions), attributes = Attributes, - records = Records}. + records = Records, + encoding = Encoding}. get_list_keyval(Key, L) -> case lists:keyfind(Key, 1, L) of diff --git a/lib/edoc/src/edoc_layout.erl b/lib/edoc/src/edoc_layout.erl index 951cec121c..7bd0615f5c 100644 --- a/lib/edoc/src/edoc_layout.erl +++ b/lib/edoc/src/edoc_layout.erl @@ -210,7 +210,8 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) -> ++ [hr, ?NL] ++ navigation("bottom") ++ timestamp()), - xhtml(Title, stylesheet(Opts), Body). + Encoding = get_attrval(encoding, E), + xhtml(Title, stylesheet(Opts), Body, Encoding). module_params(Es) -> As = [{get_text(argName, Es1), @@ -956,10 +957,17 @@ local_label(R) -> "#" ++ R. xhtml(Title, CSS, Body) -> + xhtml(Title, CSS, Body, "latin1"). + +xhtml(Title, CSS, Body, Encoding) -> + EncString = case Encoding of + "latin1" -> "ISO-8859-1"; + _ -> "UTF-8" + end, [{html, [?NL, {head, [?NL, {meta, [{'http-equiv',"Content-Type"}, - {content, "text/html; charset=ISO-8859-1"}], + {content, "text/html; charset="++EncString}], []}, ?NL, {title, Title}, @@ -1021,7 +1029,8 @@ overview(E=#xmlElement{name = overview, content = Es}, Options) -> ++ [?NL, hr] ++ navigation("bottom") ++ timestamp()), - XML = xhtml(Title, stylesheet(Opts), Body), + Encoding = get_attrval(encoding, E), + XML = xhtml(Title, stylesheet(Opts), Body, Encoding), xmerl:export_simple(XML, ?HTML_EXPORT, []). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NYTT diff --git a/lib/edoc/src/edoc_lib.erl b/lib/edoc/src/edoc_lib.erl index 90fb8a679c..67f9ee2b4a 100644 --- a/lib/edoc/src/edoc_lib.erl +++ b/lib/edoc/src/edoc_lib.erl @@ -30,10 +30,10 @@ parse_contact/2, escape_uri/1, join_uri/2, is_relative_uri/1, is_name/1, to_label/1, find_doc_dirs/0, find_sources/2, find_sources/3, find_file/3, try_subdir/2, unique/1, - write_file/3, write_file/4, write_info_file/4, + write_file/3, write_file/4, write_file/5, write_info_file/4, read_info_file/1, get_doc_env/1, get_doc_env/4, copy_file/2, uri_get/1, run_doclet/2, run_layout/2, - simplify_path/1, timestr/1, datestr/1]). + simplify_path/1, timestr/1, datestr/1, read_encoding/2]). -import(edoc_report, [report/2, warning/2]). @@ -56,6 +56,13 @@ datestr({Y,M,D}) -> "Oct", "Nov", "Dec"], lists:flatten(io_lib:fwrite("~s ~w ~w",[lists:nth(M, Ms),D,Y])). +%% @private +read_encoding(File, Options) -> + case epp:read_encoding(File, Options) of + none -> epp:default_encoding(); + Encoding -> Encoding + end. + %% @private count(X, Xs) -> count(X, Xs, 0). @@ -677,7 +684,6 @@ try_subdir(Dir, Subdir) -> write_file(Text, Dir, Name) -> write_file(Text, Dir, Name, ''). - %% @spec (Text::deep_string(), Dir::edoc:filename(), %% Name::edoc:filename(), Package::atom()|string()) -> ok %% @doc Like {@link write_file/3}, but adds path components to the target @@ -685,10 +691,13 @@ write_file(Text, Dir, Name) -> %% @private write_file(Text, Dir, Name, Package) -> + write_file(Text, Dir, Name, Package, [{encoding,latin1}]). + +write_file(Text, Dir, Name, Package, Options) -> Dir1 = filename:join([Dir | packages:split(Package)]), File = filename:join(Dir1, Name), ok = filelib:ensure_dir(File), - case file:open(File, [write]) of + case file:open(File, [write] ++ Options) of {ok, FD} -> io:put_chars(FD, Text), ok = file:close(FD); @@ -705,8 +714,9 @@ write_info_file(App, Packages, Modules, Dir) -> Ts1 = if App =:= ?NO_APP -> Ts; true -> [{application, App} | Ts] end, - S = [io_lib:fwrite("~p.\n", [T]) || T <- Ts1], - write_file(S, Dir, ?INFO_FILE). + S0 = [io_lib:fwrite("~p.\n", [T]) || T <- Ts1], + S = ["%% encoding: UTF-8\n" | S0], + write_file(S, Dir, ?INFO_FILE, '', [{encoding,unicode}]). %% @spec (Name::edoc:filename()) -> {ok, string()} | {error, Reason} %% @@ -714,7 +724,14 @@ write_info_file(App, Packages, Modules, Dir) -> read_file(File) -> case file:read_file(File) of - {ok, Bin} -> {ok, binary_to_list(Bin)}; + {ok, Bin} -> + Enc = edoc_lib:read_encoding(File, []), + case catch unicode:characters_to_list(Bin, Enc) of + String when is_list(String) -> + {ok, String}; + _ -> + {error, invalid_unicode} + end; {error, Reason} -> {error, Reason} end. diff --git a/lib/edoc/src/edoc_macros.erl b/lib/edoc/src/edoc_macros.erl index 70fb38bf0a..08686c4fb5 100644 --- a/lib/edoc/src/edoc_macros.erl +++ b/lib/edoc/src/edoc_macros.erl @@ -88,13 +88,13 @@ link_macro(S, Line, Env) -> true -> " target=\"_top\""; % note the initial space false -> "" end, - lists:flatten(io_lib:fwrite("~s", + lists:flatten(io_lib:fwrite("~ts", [URI, Target, Txt])). section_macro(S, _Line, _Env) -> S1 = lists:reverse(edoc_lib:strip_space( lists:reverse(edoc_lib:strip_space(S)))), - lists:flatten(io_lib:format("~s", + lists:flatten(io_lib:format("~ts", [edoc_lib:to_label(S1), S1])). type_macro(S, Line, Env) -> diff --git a/lib/edoc/src/edoc_wiki.erl b/lib/edoc/src/edoc_wiki.erl index 5c71658af5..cc0529d2a9 100644 --- a/lib/edoc/src/edoc_wiki.erl +++ b/lib/edoc/src/edoc_wiki.erl @@ -80,6 +80,7 @@ parse_xml(Data, Line) -> parse_xml_1(Text, Line) -> Text1 = "" ++ Text ++ "", + %% Any coding except "utf-8". Opts = [{line, Line}, {encoding, 'iso-8859-1'}], case catch {ok, xmerl_scan:string(Text1, Opts)} of {ok, {E, _}} -> @@ -174,7 +175,7 @@ expand_heading_1(Cs, N, L, As) -> expand_heading_2(Ts, Cs, N, L, As) -> H = ?BASE_HEADING + N, - Ts1 = io_lib:format("~s\n", + Ts1 = io_lib:format("~ts\n", [H, make_label(Ts), Ts, H]), expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)). -- cgit v1.2.3