aboutsummaryrefslogtreecommitdiffstats
path: root/lib/edoc
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2012-12-03 12:34:04 +0100
committerHans Bolinder <[email protected]>2013-01-02 10:15:17 +0100
commitb941448860fc2220695d04d8b8806509cb053b5a (patch)
tree3ec103a7bd6791eedac019d536649a3f459e0567 /lib/edoc
parent300c5466a7c9cfe3ed22bba2a88ba21058406402 (diff)
downloadotp-b941448860fc2220695d04d8b8806509cb053b5a.tar.gz
otp-b941448860fc2220695d04d8b8806509cb053b5a.tar.bz2
otp-b941448860fc2220695d04d8b8806509cb053b5a.zip
[edoc] Introduce Unicode support for source files
Diffstat (limited to 'lib/edoc')
-rw-r--r--lib/edoc/priv/edoc.dtd4
-rw-r--r--lib/edoc/src/edoc.erl3
-rw-r--r--lib/edoc/src/edoc.hrl6
-rw-r--r--lib/edoc/src/edoc_data.erl3
-rw-r--r--lib/edoc/src/edoc_doclet.erl20
-rw-r--r--lib/edoc/src/edoc_extract.erl14
-rw-r--r--lib/edoc/src/edoc_layout.erl15
-rw-r--r--lib/edoc/src/edoc_lib.erl31
-rw-r--r--lib/edoc/src/edoc_macros.erl4
-rw-r--r--lib/edoc/src/edoc_wiki.erl3
10 files changed, 78 insertions, 25 deletions
diff --git a/lib/edoc/priv/edoc.dtd b/lib/edoc/priv/edoc.dtd
index 6a332cf22f..ba4ac0db28 100644
--- a/lib/edoc/priv/edoc.dtd
+++ b/lib/edoc/priv/edoc.dtd
@@ -4,7 +4,8 @@
<!ELEMENT overview (title, description?, author*, copyright?, version?,
since?, see*, reference*, todo?, packages, modules)>
<!ATTLIST overview
- root CDATA #IMPLIED>
+ root CDATA #IMPLIED
+ encoding CDATA #IMPLIED>
<!ELEMENT title (#PCDATA)>
@@ -25,6 +26,7 @@
name CDATA #REQUIRED
private (yes | no) #IMPLIED
hidden (yes | no) #IMPLIED
+ encoding CDATA #IMPLIED
root CDATA #IMPLIED>
<!ELEMENT description (briefDescription, fullDescription?)>
diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl
index 544465b14a..5a599e6e97 100644
--- a/lib/edoc/src/edoc.erl
+++ b/lib/edoc/src/edoc.erl
@@ -120,7 +120,8 @@ file(Name, Options) ->
Suffix = proplists:get_value(file_suffix, Options,
?DEFAULT_FILE_SUFFIX),
Dir = proplists:get_value(dir, Options, filename:dirname(Name)),
- edoc_lib:write_file(Text, Dir, BaseName ++ Suffix).
+ Encoding = [{encoding, edoc_lib:read_encoding(Name, [])}],
+ edoc_lib:write_file(Text, Dir, BaseName ++ Suffix, '', Encoding).
%% TODO: better documentation of files/1/2, packages/1/2, application/1/2/3
diff --git a/lib/edoc/src/edoc.hrl b/lib/edoc/src/edoc.hrl
index 98debba4ab..44c5d6fef4 100644
--- a/lib/edoc/src/edoc.hrl
+++ b/lib/edoc/src/edoc.hrl
@@ -48,7 +48,8 @@
%% functions = ordset(function_name()),
%% exports = ordset(function_name()),
%% attributes = ordset({atom(), term()}),
-%% records = [{atom(), [{atom(), term()}]}]}
+%% records = [{atom(), [{atom(), term()}]}],
+%% encoding = epp:source_encoding()}
%% ordset(T) = sets:ordset(T)
%% function_name(T) = {atom(), integer()}
@@ -57,7 +58,8 @@
functions = [],
exports = [],
attributes = [],
- records = []
+ records = [],
+ encoding = latin1
}).
%% Environment for generating documentation data
diff --git a/lib/edoc/src/edoc_data.erl b/lib/edoc/src/edoc_data.erl
index 624f9177a2..f88ba05f4b 100644
--- a/lib/edoc/src/edoc_data.erl
+++ b/lib/edoc/src/edoc_data.erl
@@ -83,7 +83,8 @@ module(Module, Entries, Env, Opts) ->
AllTags = get_all_tags(Entries),
Functions = function_filter(Entries, Opts),
Out = {module, ([{name, Name},
- {root, Env#env.root}]
+ {root, Env#env.root},
+ {encoding, Module#module.encoding}]
++ case is_private(HeaderTags) of
true -> [{private, "yes"}];
false -> []
diff --git a/lib/edoc/src/edoc_doclet.erl b/lib/edoc/src/edoc_doclet.erl
index 385d20e9ae..d6561e10fc 100644
--- a/lib/edoc/src/edoc_doclet.erl
+++ b/lib/edoc/src/edoc_doclet.erl
@@ -193,7 +193,8 @@ source({M, P, Name, Path}, Dir, Suffix, Env, Set, Private, Hidden,
true ->
Text = edoc:layout(Doc, Options),
Name1 = packages:last(M) ++ Suffix,
- edoc_lib:write_file(Text, Dir, Name1, P),
+ Encoding = [{encoding,encoding(Doc)}],
+ edoc_lib:write_file(Text, Dir, Name1, P, Encoding),
{sets:add_element(Module, Set), Error};
false ->
{Set, Error}
@@ -359,14 +360,19 @@ xhtml_1(Title, CSS, Body) ->
overview(Dir, Title, Env, Opts) ->
File = proplists:get_value(overview, Opts,
filename:join(Dir, ?OVERVIEW_FILE)),
+ Encoding = edoc_lib:read_encoding(File, [{in_comment_only, false}]),
Tags = read_file(File, overview, Env, Opts),
- Data = edoc_data:overview(Title, Tags, Env, Opts),
+ Data0 = edoc_data:overview(Title, Tags, Env, Opts),
+ EncodingAttribute = #xmlAttribute{name = encoding,
+ value = atom_to_list(Encoding)},
+ #xmlElement{attributes = As} = Data0,
+ Data = Data0#xmlElement{attributes = [EncodingAttribute | As]},
F = fun (M) ->
M:overview(Data, Opts)
end,
Text = edoc_lib:run_layout(F, Opts),
- edoc_lib:write_file(Text, Dir, ?OVERVIEW_SUMMARY).
-
+ EncOpts = [{encoding,Encoding}],
+ edoc_lib:write_file(Text, Dir, ?OVERVIEW_SUMMARY, '', EncOpts).
copy_image(Dir) ->
case code:priv_dir(?EDOC_APP) of
@@ -441,6 +447,12 @@ is_hidden(E) ->
_ -> false
end.
+encoding(E) ->
+ case get_attrval(encoding, E) of
+ "latin1" -> latin1;
+ _ -> utf8
+ end.
+
get_attrval(Name, #xmlElement{attributes = As}) ->
case get_attr(Name, As) of
[#xmlAttribute{value = V}] ->
diff --git a/lib/edoc/src/edoc_extract.erl b/lib/edoc/src/edoc_extract.erl
index 5a79e127f6..dcab816f54 100644
--- a/lib/edoc/src/edoc_extract.erl
+++ b/lib/edoc/src/edoc_extract.erl
@@ -226,7 +226,7 @@ add_macro_defs(Defs0, Opts, Env) ->
%% lines of text before the first tag are ignored. `Env' is an
%% environment created by {@link edoc_lib:get_doc_env/4}. Upon error,
%% `Reason' is an atom returned from the call to {@link
-%% //kernel/file:read_file/1}.
+%% //kernel/file:read_file/1} or the atom 'invalid_unicode'.
%%
%% See {@link text/4} for options.
@@ -235,7 +235,13 @@ add_macro_defs(Defs0, Opts, Env) ->
file(File, Context, Env, Opts) ->
case file:read_file(File) of
{ok, Bin} ->
- {ok, text(binary_to_list(Bin), Context, Env, Opts, File)};
+ Enc = edoc_lib:read_encoding(File,[{in_comment_only, false}]),
+ case catch unicode:characters_to_list(Bin, Enc) of
+ String when is_list(String) ->
+ {ok, text(String, Context, Env, Opts, File)};
+ _ ->
+ {error, invalid_unicode}
+ end;
{error, _} = Error ->
Error
end.
@@ -306,12 +312,14 @@ get_module_info(Forms, File) ->
Exports = ordsets:from_list(get_list_keyval(exports, L)),
Attributes = ordsets:from_list(get_list_keyval(attributes, L)),
Records = get_list_keyval(records, L),
+ Encoding = edoc_lib:read_encoding(File, []),
#module{name = Name,
parameters = Vars,
functions = Functions,
exports = ordsets:intersection(Exports, Functions),
attributes = Attributes,
- records = Records}.
+ records = Records,
+ encoding = Encoding}.
get_list_keyval(Key, L) ->
case lists:keyfind(Key, 1, L) of
diff --git a/lib/edoc/src/edoc_layout.erl b/lib/edoc/src/edoc_layout.erl
index 951cec121c..7bd0615f5c 100644
--- a/lib/edoc/src/edoc_layout.erl
+++ b/lib/edoc/src/edoc_layout.erl
@@ -210,7 +210,8 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) ->
++ [hr, ?NL]
++ navigation("bottom")
++ timestamp()),
- xhtml(Title, stylesheet(Opts), Body).
+ Encoding = get_attrval(encoding, E),
+ xhtml(Title, stylesheet(Opts), Body, Encoding).
module_params(Es) ->
As = [{get_text(argName, Es1),
@@ -956,10 +957,17 @@ local_label(R) ->
"#" ++ R.
xhtml(Title, CSS, Body) ->
+ xhtml(Title, CSS, Body, "latin1").
+
+xhtml(Title, CSS, Body, Encoding) ->
+ EncString = case Encoding of
+ "latin1" -> "ISO-8859-1";
+ _ -> "UTF-8"
+ end,
[{html, [?NL,
{head, [?NL,
{meta, [{'http-equiv',"Content-Type"},
- {content, "text/html; charset=ISO-8859-1"}],
+ {content, "text/html; charset="++EncString}],
[]},
?NL,
{title, Title},
@@ -1021,7 +1029,8 @@ overview(E=#xmlElement{name = overview, content = Es}, Options) ->
++ [?NL, hr]
++ navigation("bottom")
++ timestamp()),
- XML = xhtml(Title, stylesheet(Opts), Body),
+ Encoding = get_attrval(encoding, E),
+ XML = xhtml(Title, stylesheet(Opts), Body, Encoding),
xmerl:export_simple(XML, ?HTML_EXPORT, []).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NYTT
diff --git a/lib/edoc/src/edoc_lib.erl b/lib/edoc/src/edoc_lib.erl
index 90fb8a679c..67f9ee2b4a 100644
--- a/lib/edoc/src/edoc_lib.erl
+++ b/lib/edoc/src/edoc_lib.erl
@@ -30,10 +30,10 @@
parse_contact/2, escape_uri/1, join_uri/2, is_relative_uri/1,
is_name/1, to_label/1, find_doc_dirs/0, find_sources/2,
find_sources/3, find_file/3, try_subdir/2, unique/1,
- write_file/3, write_file/4, write_info_file/4,
+ write_file/3, write_file/4, write_file/5, write_info_file/4,
read_info_file/1, get_doc_env/1, get_doc_env/4, copy_file/2,
uri_get/1, run_doclet/2, run_layout/2,
- simplify_path/1, timestr/1, datestr/1]).
+ simplify_path/1, timestr/1, datestr/1, read_encoding/2]).
-import(edoc_report, [report/2, warning/2]).
@@ -57,6 +57,13 @@ datestr({Y,M,D}) ->
lists:flatten(io_lib:fwrite("~s ~w ~w",[lists:nth(M, Ms),D,Y])).
%% @private
+read_encoding(File, Options) ->
+ case epp:read_encoding(File, Options) of
+ none -> epp:default_encoding();
+ Encoding -> Encoding
+ end.
+
+%% @private
count(X, Xs) ->
count(X, Xs, 0).
@@ -677,7 +684,6 @@ try_subdir(Dir, Subdir) ->
write_file(Text, Dir, Name) ->
write_file(Text, Dir, Name, '').
-
%% @spec (Text::deep_string(), Dir::edoc:filename(),
%% Name::edoc:filename(), Package::atom()|string()) -> ok
%% @doc Like {@link write_file/3}, but adds path components to the target
@@ -685,10 +691,13 @@ write_file(Text, Dir, Name) ->
%% @private
write_file(Text, Dir, Name, Package) ->
+ write_file(Text, Dir, Name, Package, [{encoding,latin1}]).
+
+write_file(Text, Dir, Name, Package, Options) ->
Dir1 = filename:join([Dir | packages:split(Package)]),
File = filename:join(Dir1, Name),
ok = filelib:ensure_dir(File),
- case file:open(File, [write]) of
+ case file:open(File, [write] ++ Options) of
{ok, FD} ->
io:put_chars(FD, Text),
ok = file:close(FD);
@@ -705,8 +714,9 @@ write_info_file(App, Packages, Modules, Dir) ->
Ts1 = if App =:= ?NO_APP -> Ts;
true -> [{application, App} | Ts]
end,
- S = [io_lib:fwrite("~p.\n", [T]) || T <- Ts1],
- write_file(S, Dir, ?INFO_FILE).
+ S0 = [io_lib:fwrite("~p.\n", [T]) || T <- Ts1],
+ S = ["%% encoding: UTF-8\n" | S0],
+ write_file(S, Dir, ?INFO_FILE, '', [{encoding,unicode}]).
%% @spec (Name::edoc:filename()) -> {ok, string()} | {error, Reason}
%%
@@ -714,7 +724,14 @@ write_info_file(App, Packages, Modules, Dir) ->
read_file(File) ->
case file:read_file(File) of
- {ok, Bin} -> {ok, binary_to_list(Bin)};
+ {ok, Bin} ->
+ Enc = edoc_lib:read_encoding(File, []),
+ case catch unicode:characters_to_list(Bin, Enc) of
+ String when is_list(String) ->
+ {ok, String};
+ _ ->
+ {error, invalid_unicode}
+ end;
{error, Reason} -> {error, Reason}
end.
diff --git a/lib/edoc/src/edoc_macros.erl b/lib/edoc/src/edoc_macros.erl
index 70fb38bf0a..08686c4fb5 100644
--- a/lib/edoc/src/edoc_macros.erl
+++ b/lib/edoc/src/edoc_macros.erl
@@ -88,13 +88,13 @@ link_macro(S, Line, Env) ->
true -> " target=\"_top\""; % note the initial space
false -> ""
end,
- lists:flatten(io_lib:fwrite("<a href=\"~s\"~s>~s</a>",
+ lists:flatten(io_lib:fwrite("<a href=\"~s\"~s>~ts</a>",
[URI, Target, Txt])).
section_macro(S, _Line, _Env) ->
S1 = lists:reverse(edoc_lib:strip_space(
lists:reverse(edoc_lib:strip_space(S)))),
- lists:flatten(io_lib:format("<a href=\"#~s\">~s</a>",
+ lists:flatten(io_lib:format("<a href=\"#~ts\">~ts</a>",
[edoc_lib:to_label(S1), S1])).
type_macro(S, Line, Env) ->
diff --git a/lib/edoc/src/edoc_wiki.erl b/lib/edoc/src/edoc_wiki.erl
index 5c71658af5..cc0529d2a9 100644
--- a/lib/edoc/src/edoc_wiki.erl
+++ b/lib/edoc/src/edoc_wiki.erl
@@ -80,6 +80,7 @@ parse_xml(Data, Line) ->
parse_xml_1(Text, Line) ->
Text1 = "<doc>" ++ Text ++ "</doc>",
+ %% Any coding except "utf-8".
Opts = [{line, Line}, {encoding, 'iso-8859-1'}],
case catch {ok, xmerl_scan:string(Text1, Opts)} of
{ok, {E, _}} ->
@@ -174,7 +175,7 @@ expand_heading_1(Cs, N, L, As) ->
expand_heading_2(Ts, Cs, N, L, As) ->
H = ?BASE_HEADING + N,
- Ts1 = io_lib:format("<h~w><a name=\"~s\">~s</a></h~w>\n",
+ Ts1 = io_lib:format("<h~w><a name=\"~ts\">~ts</a></h~w>\n",
[H, make_label(Ts), Ts, H]),
expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)).