aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-03-03 13:50:23 +0100
committerBjörn Gustavsson <[email protected]>2014-03-18 17:47:51 +0100
commitbfe7e45cda6591dc31405ed1be961f079cc541c9 (patch)
tree45c894c1fb3a291d6ab7e5e083b648640554e540
parentf3cee0e9f409c5850709f11ba15cec22d7387401 (diff)
downloadotp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.gz
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.bz2
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.zip
Don't fail compilation for modules that contain invalid UTF-8
The default encoding for Erlang modules is now UTF-8, and the compilation would fail if a module contained byte sequences that are not valid UTF-8 sequences. In a large project with say many hundreds of Erlang modules with names of developers such as "Björn" or "Håkan" encoded in latin-1, that could mean that many hundreds of files would need to be modified just to get started testing OTP 17. As a temporary measure to ease the transition, automatically fall back to the latin-1 encoding with a warning for any module that contains invalid byte sequences and for which no encoding has been specified. The intention is to remove this workaround in OTP 18 or 19.
-rw-r--r--lib/compiler/src/compile.erl59
-rw-r--r--lib/compiler/test/error_SUITE.erl28
-rw-r--r--lib/compiler/test/warnings_SUITE.erl38
3 files changed, 111 insertions, 14 deletions
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 9030dd998b..c7d91070f6 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -234,7 +234,9 @@ format_error({crash,Pass,Reason}) ->
format_error({bad_return,Pass,Reason}) ->
io_lib:format("internal error in ~p;\nbad return value: ~ts", [Pass,format_error_reason(Reason)]);
format_error({module_name,Mod,Filename}) ->
- io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]).
+ io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]);
+format_error(reparsing_invalid_unicode) ->
+ "Non-UTF-8 character(s) detected, but no encoding declared. Encode the file in UTF-8 or add \"%% coding: latin-1\" at the beginning of the file. Retrying with latin-1 encoding.".
format_error_reason({Reason, Stack}) when is_list(Stack) ->
StackFun = fun
@@ -792,20 +794,59 @@ no_native_compilation(BeamFile, #compile{options=Opts0}) ->
_ -> false
end.
-parse_module(St) ->
- Opts = St#compile.options,
- Cwd = ".",
- IncludePath = [Cwd, St#compile.dir|inc_paths(Opts)],
- R = epp:parse_file(St#compile.ifile, IncludePath, pre_defs(Opts)),
+parse_module(St0) ->
+ case do_parse_module(utf8, St0) of
+ {ok,_}=Ret ->
+ Ret;
+ {error,_}=Ret ->
+ Ret;
+ {invalid_unicode,File,Line} ->
+ case do_parse_module(latin1, St0) of
+ {ok,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {ok,St#compile{warnings=Es++St#compile.warnings}};
+ {error,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {error,St#compile{errors=Es++St#compile.errors}}
+ end
+ end.
+
+do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) ->
+ R = epp:parse_file(File,
+ [{includes,[".",Dir|inc_paths(Opts)]},
+ {macros,pre_defs(Opts)},
+ {default_encoding,DefEncoding},
+ extra]),
case R of
- {ok,Forms} ->
- Encoding = epp:read_encoding(St#compile.ifile),
- {ok,St#compile{code=Forms,encoding=Encoding}};
+ {ok,Forms,Extra} ->
+ Encoding = proplists:get_value(encoding, Extra),
+ case find_invalid_unicode(Forms, File) of
+ none ->
+ {ok,St#compile{code=Forms,encoding=Encoding}};
+ {invalid_unicode,_,_}=Ret ->
+ case Encoding of
+ none ->
+ Ret;
+ _ ->
+ {ok,St#compile{code=Forms,encoding=Encoding}}
+ end
+ end;
{error,E} ->
Es = [{St#compile.ifile,[{none,?MODULE,{epp,E}}]}],
{error,St#compile{errors=St#compile.errors ++ Es}}
end.
+find_invalid_unicode([H|T], File0) ->
+ case H of
+ {attribute,_,file,{File,_}} ->
+ find_invalid_unicode(T, File);
+ {error,{Line,file_io_server,invalid_unicode}} ->
+ {invalid_unicode,File0,Line};
+ _Other ->
+ find_invalid_unicode(T, File0)
+ end;
+find_invalid_unicode([], _) -> none.
+
parse_core(St) ->
case file:read_file(St#compile.ifile) of
{ok,Bin} ->
diff --git a/lib/compiler/test/error_SUITE.erl b/lib/compiler/test/error_SUITE.erl
index 5cdf429a5f..bd877bb528 100644
--- a/lib/compiler/test/error_SUITE.erl
+++ b/lib/compiler/test/error_SUITE.erl
@@ -23,7 +23,7 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2,
head_mismatch_line/1,warnings_as_errors/1, bif_clashes/1,
- transforms/1,forbidden_maps/1]).
+ transforms/1,forbidden_maps/1,bad_utf8/1]).
%% Used by transforms/1 test case.
-export([parse_transform/2]).
@@ -36,7 +36,8 @@ all() ->
groups() ->
[{p,test_lib:parallel(),
- [head_mismatch_line,warnings_as_errors,bif_clashes,transforms,forbidden_maps]}].
+ [head_mismatch_line,warnings_as_errors,bif_clashes,
+ transforms,forbidden_maps,bad_utf8]}].
init_per_suite(Config) ->
Config.
@@ -254,6 +255,23 @@ forbidden_maps(Config) when is_list(Config) ->
[] = run2(Config, Ts1),
ok.
+bad_utf8(Config) ->
+ Ts = [{bad_utf8,
+ %% If coding is specified explicitly as utf-8, there should be
+ %% a compilation error; we must not fallback to parsing the
+ %% file in latin-1 mode.
+ <<"%% coding: utf-8
+ %% Bj",246,"rn
+ t() -> \"",246,"\".
+ ">>,
+ [],
+ {error,[{2,epp,cannot_parse},
+ {2,file_io_server,invalid_unicode}],
+ []}
+ }],
+ [] = run2(Config, Ts),
+ ok.
+
run(Config, Tests) ->
?line File = test_filename(Config),
@@ -318,6 +336,7 @@ run_test(Test0, File, Warnings, WriteBeam) ->
?line compile:file(File, [binary,report|Warnings]),
%% Test result of compilation.
+ io:format("~p\n", [Opts]),
?line Res = case compile:file(File, Opts) of
{ok,Mod,_,[{_File,Ws}]} ->
%io:format("compile:file(~s,~p) ->~n~p~n",
@@ -335,6 +354,11 @@ run_test(Test0, File, Warnings, WriteBeam) ->
%io:format("compile:file(~s,~p) ->~n~p~n",
% [File,Opts,_ZZ]),
{error,Es,Ws};
+ {error,[{XFile,Es1},{XFile,Es2}],Ws} = _ZZ
+ when is_list(XFile) ->
+ %io:format("compile:file(~s,~p) ->~n~p~n",
+ % [File,Opts,_ZZ]),
+ {error,Es1++Es2,Ws};
{error,Es,[{_File,Ws}]} = _ZZ->
%io:format("compile:file(~s,~p) ->~n~p~n",
% [File,Opts,_ZZ]),
diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl
index de56a59e12..c3b02819f9 100644
--- a/lib/compiler/test/warnings_SUITE.erl
+++ b/lib/compiler/test/warnings_SUITE.erl
@@ -37,8 +37,9 @@
-export([pattern/1,pattern2/1,pattern3/1,pattern4/1,
guard/1,bad_arith/1,bool_cases/1,bad_apply/1,
- files/1,effect/1,bin_opt_info/1,bin_construction/1, comprehensions/1,
- maps/1,redundant_boolean_clauses/1]).
+ files/1,effect/1,bin_opt_info/1,bin_construction/1,
+ comprehensions/1,maps/1,redundant_boolean_clauses/1,
+ latin1_fallback/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(2)).
@@ -63,7 +64,7 @@ groups() ->
[pattern,pattern2,pattern3,pattern4,guard,
bad_arith,bool_cases,bad_apply,files,effect,
bin_opt_info,bin_construction,comprehensions,maps,
- redundant_boolean_clauses]}].
+ redundant_boolean_clauses,latin1_fallback]}].
init_per_suite(Config) ->
Config.
@@ -591,6 +592,37 @@ redundant_boolean_clauses(Config) when is_list(Config) ->
run(Config, Ts),
ok.
+latin1_fallback(Conf) when is_list(Conf) ->
+ DataDir = ?privdir,
+ IncFile = filename:join(DataDir, "include_me.hrl"),
+ file:write_file(IncFile, <<"%% ",246," in include file\n">>),
+ Ts1 = [{latin1_fallback1,
+ %% Test that the compiler fall backs to latin-1 with
+ %% a warning if a file has no encoding and does not
+ %% contain correct UTF-8 sequences.
+ <<"%% Bj",246,"rn
+ t(_) -> \"",246,"\";
+ t(x) -> ok.
+ ">>,
+ [],
+ {warnings,[{1,compile,reparsing_invalid_unicode},
+ {3,sys_core_fold,{nomatch_shadow,2}}]}}],
+ [] = run(Conf, Ts1),
+
+ Ts2 = [{latin1_fallback2,
+ %% Test that the compiler fall backs to latin-1 with
+ %% a warning if a file has no encoding and does not
+ %% contain correct UTF-8 sequences.
+ <<"
+
+ -include(\"include_me.hrl\").
+ ">>,
+ [],
+ {warnings,[{1,compile,reparsing_invalid_unicode}]}
+ }],
+ [] = run(Conf, Ts2),
+ ok.
+
%%%
%%% End of test cases.
%%%