aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <hasse@erlang.org>2014-06-25 15:26:45 +0200
committerHans Bolinder <hasse@erlang.org>2014-06-26 14:36:34 +0200
commitde3774d93b061a54d33c7c58f07c69cbbb36cadf (patch)
tree3ded8f54f2917a7276a2f7a73fcdc73417e6c4bf
parent461dc05384eece7b4b7d84370fb0a2cf96ed2f6d (diff)
downloadotp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.gz
otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.bz2
otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.zip
edoc, syntax_tools: Don't fail on invalid UTF-8
As a temporary measure to ease the transition to default UTF-8 encoding, automatically fall back to the Latin-1 encoding (without any warnings; the Erlang Compiler will emit a proper warning). The intention is to remove this workaround in OTP 18 or 19.
-rw-r--r--lib/edoc/src/edoc.erl35
-rw-r--r--lib/edoc/test/edoc_SUITE.erl22
-rw-r--r--lib/edoc/test/edoc_SUITE_data/un1.erl7
-rw-r--r--lib/edoc/test/edoc_SUITE_data/un2.erl8
-rw-r--r--lib/edoc/test/edoc_SUITE_data/un3.erl8
-rw-r--r--lib/syntax_tools/src/epp_dodger.erl28
-rw-r--r--lib/syntax_tools/src/erl_comment_scan.erl13
7 files changed, 114 insertions, 7 deletions
diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl
index a87a8471e3..983f04e8b6 100644
--- a/lib/edoc/src/edoc.erl
+++ b/lib/edoc/src/edoc.erl
@@ -696,15 +696,44 @@ read_source_2(Name, Opts) ->
%% The line of the dot token will be copied to the integer token.
parse_file(Name, Includes, Macros) ->
- case epp:open(Name, Includes, Macros) of
- {ok, Epp} ->
- try {ok, parse_file(Epp)}
+ case parse_file(utf8, Name, Includes, Macros) of
+ invalid_unicode ->
+ parse_file(latin1, Name, Includes, Macros);
+ Ret ->
+ Ret
+ end.
+
+parse_file(DefEncoding, Name, Includes, Macros) ->
+ Options = [{name, Name},
+ {includes, Includes},
+ {macros, Macros},
+ {default_encoding, DefEncoding}],
+ case epp:open([extra | Options]) of
+ {ok, Epp, Extra} ->
+ try parse_file(Epp) of
+ Forms ->
+ Encoding = proplists:get_value(encoding, Extra),
+ case find_invalid_unicode(Forms) of
+ invalid_unicode when Encoding =/= utf8 ->
+ invalid_unicode;
+ _ ->
+ {ok, Forms}
+ end
after _ = epp:close(Epp)
end;
Error ->
Error
end.
+find_invalid_unicode([H|T]) ->
+ case H of
+ {error,{_Line,file_io_server,invalid_unicode}} ->
+ invalid_unicode;
+ _Other ->
+ find_invalid_unicode(T)
+ end;
+find_invalid_unicode([]) -> none.
+
parse_file(Epp) ->
case scan_and_parse(Epp) of
{ok, Form} ->
diff --git a/lib/edoc/test/edoc_SUITE.erl b/lib/edoc/test/edoc_SUITE.erl
index c9c7811afb..c63660c8c0 100644
--- a/lib/edoc/test/edoc_SUITE.erl
+++ b/lib/edoc/test/edoc_SUITE.erl
@@ -22,12 +22,12 @@
init_per_group/2,end_per_group/2]).
%% Test cases
--export([app/1,appup/1,build_std/1,build_map_module/1]).
+-export([app/1,appup/1,build_std/1,build_map_module/1,otp_12008/1]).
suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
- [app,appup,build_std,build_map_module].
+ [app,appup,build_std,build_map_module,otp_12008].
groups() ->
[].
@@ -77,3 +77,21 @@ build_map_module(Config) when is_list(Config) ->
Filename = filename:join(DataDir, "map_module.erl"),
ok = edoc:file(Filename, [{dir, PrivDir}]),
ok.
+
+otp_12008(Config) when is_list(Config) ->
+ DataDir = ?config(data_dir, Config),
+ PrivDir = ?config(priv_dir, Config),
+ Un1 = filename:join(DataDir, "un1.erl"),
+ Un2 = filename:join(DataDir, "un2.erl"),
+ Un3 = filename:join(DataDir, "un3.erl"),
+ %% epp_dodger
+ Opts1 = [{dir, PrivDir}],
+ ok = edoc:files([Un1], Opts1),
+ ok = edoc:files([Un2], Opts1),
+ {'EXIT', error} = (catch edoc:files([Un3], Opts1)),
+ %% epp
+ Opts2 = [{preprocess, true}, {dir, PrivDir}],
+ ok = edoc:files([Un1], Opts2),
+ ok = edoc:files([Un2], Opts2),
+ {'EXIT', error} = (catch edoc:files([Un3], Opts2)),
+ ok.
diff --git a/lib/edoc/test/edoc_SUITE_data/un1.erl b/lib/edoc/test/edoc_SUITE_data/un1.erl
new file mode 100644
index 0000000000..0c48e7f940
--- /dev/null
+++ b/lib/edoc/test/edoc_SUITE_data/un1.erl
@@ -0,0 +1,7 @@
+-module(un1).
+
+-export([t/0]).
+
+%% @doc F�pp
+t() ->
+ �rlig.
diff --git a/lib/edoc/test/edoc_SUITE_data/un2.erl b/lib/edoc/test/edoc_SUITE_data/un2.erl
new file mode 100644
index 0000000000..a6d13f4723
--- /dev/null
+++ b/lib/edoc/test/edoc_SUITE_data/un2.erl
@@ -0,0 +1,8 @@
+-module(un2).
+%% coding: latin-1
+
+-export([t/0]).
+
+%% @doc F�pp
+t() ->
+ �rlig.
diff --git a/lib/edoc/test/edoc_SUITE_data/un3.erl b/lib/edoc/test/edoc_SUITE_data/un3.erl
new file mode 100644
index 0000000000..fbe9591dce
--- /dev/null
+++ b/lib/edoc/test/edoc_SUITE_data/un3.erl
@@ -0,0 +1,8 @@
+-module(un3).
+%% coding: utf-8
+
+-export([t/0]).
+
+%% @doc F�pp
+t() ->
+ �rlig.
diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl
index 131be4e8e4..7e12eab1b5 100644
--- a/lib/syntax_tools/src/epp_dodger.erl
+++ b/lib/syntax_tools/src/epp_dodger.erl
@@ -184,9 +184,27 @@ quick_parse_file(File, Options) ->
parse_file(File, fun quick_parse/3, Options ++ [no_fail]).
parse_file(File, Parser, Options) ->
+ case do_parse_file(utf8, File, Parser, Options) of
+ {ok, Forms}=Ret ->
+ case find_invalid_unicode(Forms) of
+ none ->
+ Ret;
+ invalid_unicode ->
+ case epp:read_encoding(File) of
+ utf8 ->
+ Ret;
+ _ ->
+ do_parse_file(latin1, File, Parser, Options)
+ end
+ end;
+ Else ->
+ Else
+ end.
+
+do_parse_file(DefEncoding, File, Parser, Options) ->
case file:open(File, [read]) of
{ok, Dev} ->
- _ = epp:set_encoding(Dev),
+ _ = epp:set_encoding(Dev, DefEncoding),
try Parser(Dev, 1, Options)
after ok = file:close(Dev)
end;
@@ -194,6 +212,14 @@ parse_file(File, Parser, Options) ->
Error
end.
+find_invalid_unicode([H|T]) ->
+ case H of
+ {error, {_Line, file_io_server, invalid_unicode}} ->
+ invalid_unicode;
+ _Other ->
+ find_invalid_unicode(T)
+ end;
+find_invalid_unicode([]) -> none.
%% =====================================================================
%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl
index dae7530ce7..03429d4d42 100644
--- a/lib/syntax_tools/src/erl_comment_scan.erl
+++ b/lib/syntax_tools/src/erl_comment_scan.erl
@@ -72,13 +72,24 @@ file(Name) ->
{ok, V} ->
case V of
{ok, B} ->
- Enc = case epp:read_encoding(Name) of
+ Encoding = epp:read_encoding_from_binary(B),
+ Enc = case Encoding of
none -> epp:default_encoding();
Enc0 -> Enc0
end,
case catch unicode:characters_to_list(B, Enc) of
String when is_list(String) ->
string(String);
+ R when Encoding =:= none ->
+ case
+ catch unicode:characters_to_list(B, latin1)
+ of
+ String when is_list(String) ->
+ string(String);
+ _ ->
+ error_read_file(Name1),
+ exit(R)
+ end;
R ->
error_read_file(Name1),
exit(R)