aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBjörn Gustavsson <bjorn@erlang.org>2014-03-03 13:50:23 +0100
committerBjörn Gustavsson <bjorn@erlang.org>2014-03-18 17:47:51 +0100
commitbfe7e45cda6591dc31405ed1be961f079cc541c9 (patch)
tree45c894c1fb3a291d6ab7e5e083b648640554e540 /lib
parentf3cee0e9f409c5850709f11ba15cec22d7387401 (diff)
downloadotp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.gz
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.bz2
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.zip
Don't fail compilation for modules that contain invalid UTF-8
The default encoding for Erlang modules is now UTF-8, and the compilation would fail if a module contained byte sequences that are not valid UTF-8 sequences. In a large project with say many hundreds of Erlang modules with names of developers such as "Björn" or "Håkan" encoded in latin-1, that could mean that many hundreds of files would need to be modified just to get started testing OTP 17. As a temporary measure to ease the transition, automatically fall back to the latin-1 encoding with a warning for any module that contains invalid byte sequences and for which no encoding has been specified. The intention is to remove this workaround in OTP 18 or 19.
Diffstat (limited to 'lib')
-rw-r--r--lib/compiler/src/compile.erl59
-rw-r--r--lib/compiler/test/error_SUITE.erl28
-rw-r--r--lib/compiler/test/warnings_SUITE.erl38
3 files changed, 111 insertions, 14 deletions
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 9030dd998b..c7d91070f6 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -234,7 +234,9 @@ format_error({crash,Pass,Reason}) ->
format_error({bad_return,Pass,Reason}) ->
io_lib:format("internal error in ~p;\nbad return value: ~ts", [Pass,format_error_reason(Reason)]);
format_error({module_name,Mod,Filename}) ->
- io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]).
+ io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]);
+format_error(reparsing_invalid_unicode) ->
+ "Non-UTF-8 character(s) detected, but no encoding declared. Encode the file in UTF-8 or add \"%% coding: latin-1\" at the beginning of the file. Retrying with latin-1 encoding.".
format_error_reason({Reason, Stack}) when is_list(Stack) ->
StackFun = fun
@@ -792,20 +794,59 @@ no_native_compilation(BeamFile, #compile{options=Opts0}) ->
_ -> false
end.
-parse_module(St) ->
- Opts = St#compile.options,
- Cwd = ".",
- IncludePath = [Cwd, St#compile.dir|inc_paths(Opts)],
- R = epp:parse_file(St#compile.ifile, IncludePath, pre_defs(Opts)),
+parse_module(St0) ->
+ case do_parse_module(utf8, St0) of
+ {ok,_}=Ret ->
+ Ret;
+ {error,_}=Ret ->
+ Ret;
+ {invalid_unicode,File,Line} ->
+ case do_parse_module(latin1, St0) of
+ {ok,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {ok,St#compile{warnings=Es++St#compile.warnings}};
+ {error,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {error,St#compile{errors=Es++St#compile.errors}}
+ end
+ end.
+
+do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) ->
+ R = epp:parse_file(File,
+ [{includes,[".",Dir|inc_paths(Opts)]},
+ {macros,pre_defs(Opts)},
+ {default_encoding,DefEncoding},
+ extra]),
case R of
- {ok,Forms} ->
- Encoding = epp:read_encoding(St#compile.ifile),
- {ok,St#compile{code=Forms,encoding=Encoding}};
+ {ok,Forms,Extra} ->
+ Encoding = proplists:get_value(encoding, Extra),
+ case find_invalid_unicode(Forms, File) of
+ none ->
+ {ok,St#compile{code=Forms,encoding=Encoding}};
+ {invalid_unicode,_,_}=Ret ->
+ case Encoding of
+ none ->
+ Ret;
+ _ ->
+ {ok,St#compile{code=Forms,encoding=Encoding}}
+ end
+ end;
{error,E} ->
Es = [{St#compile.ifile,[{none,?MODULE,{epp,E}}]}],
{error,St#compile{errors=St#compile.errors ++ Es}}
end.
+find_invalid_unicode([H|T], File0) ->
+ case H of
+ {attribute,_,file,{File,_}} ->
+ find_invalid_unicode(T, File);
+ {error,{Line,file_io_server,invalid_unicode}} ->
+ {invalid_unicode,File0,Line};
+ _Other ->
+ find_invalid_unicode(T, File0)
+ end;
+find_invalid_unicode([], _) -> none.
+
parse_core(St) ->
case file:read_file(St#compile.ifile) of
{ok,Bin} ->
diff --git a/lib/compiler/test/error_SUITE.erl b/lib/compiler/test/error_SUITE.erl
index 5cdf429a5f..bd877bb528 100644
--- a/lib/compiler/test/error_SUITE.erl
+++ b/lib/compiler/test/error_SUITE.erl
@@ -23,7 +23,7 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2,
head_mismatch_line/1,warnings_as_errors/1, bif_clashes/1,
- transforms/1,forbidden_maps/1]).
+ transforms/1,forbidden_maps/1,bad_utf8/1]).
%% Used by transforms/1 test case.
-export([parse_transform/2]).
@@ -36,7 +36,8 @@ all() ->
groups() ->
[{p,test_lib:parallel(),
- [head_mismatch_line,warnings_as_errors,bif_clashes,transforms,forbidden_maps]}].
+ [head_mismatch_line,warnings_as_errors,bif_clashes,
+ transforms,forbidden_maps,bad_utf8]}].
init_per_suite(Config) ->
Config.
@@ -254,6 +255,23 @@ forbidden_maps(Config) when is_list(Config) ->
[] = run2(Config, Ts1),
ok.
+bad_utf8(Config) ->
+ Ts = [{bad_utf8,
+ %% If coding is specified explicitly as utf-8, there should be
+ %% a compilation error; we must not fallback to parsing the
+ %% file in latin-1 mode.
+ <<"%% coding: utf-8
+ %% Bj",246,"rn
+ t() -> \"",246,"\".
+ ">>,
+ [],
+ {error,[{2,epp,cannot_parse},
+ {2,file_io_server,invalid_unicode}],
+ []}
+ }],
+ [] = run2(Config, Ts),
+ ok.
+
run(Config, Tests) ->
?line File = test_filename(Config),
@@ -318,6 +336,7 @@ run_test(Test0, File, Warnings, WriteBeam) ->
?line compile:file(File, [binary,report|Warnings]),
%% Test result of compilation.
+ io:format("~p\n", [Opts]),
?line Res = case compile:file(File, Opts) of
{ok,Mod,_,[{_File,Ws}]} ->
%io:format("compile:file(~s,~p) ->~n~p~n",
@@ -335,6 +354,11 @@ run_test(Test0, File, Warnings, WriteBeam) ->
%io:format("compile:file(~s,~p) ->~n~p~n",
% [File,Opts,_ZZ]),
{error,Es,Ws};
+ {error,[{XFile,Es1},{XFile,Es2}],Ws} = _ZZ
+ when is_list(XFile) ->
+ %io:format("compile:file(~s,~p) ->~n~p~n",
+ % [File,Opts,_ZZ]),
+ {error,Es1++Es2,Ws};
{error,Es,[{_File,Ws}]} = _ZZ->
%io:format("compile:file(~s,~p) ->~n~p~n",
% [File,Opts,_ZZ]),
diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl
index de56a59e12..c3b02819f9 100644
--- a/lib/compiler/test/warnings_SUITE.erl
+++ b/lib/compiler/test/warnings_SUITE.erl
@@ -37,8 +37,9 @@
-export([pattern/1,pattern2/1,pattern3/1,pattern4/1,
guard/1,bad_arith/1,bool_cases/1,bad_apply/1,
- files/1,effect/1,bin_opt_info/1,bin_construction/1, comprehensions/1,
- maps/1,redundant_boolean_clauses/1]).
+ files/1,effect/1,bin_opt_info/1,bin_construction/1,
+ comprehensions/1,maps/1,redundant_boolean_clauses/1,
+ latin1_fallback/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(2)).
@@ -63,7 +64,7 @@ groups() ->
[pattern,pattern2,pattern3,pattern4,guard,
bad_arith,bool_cases,bad_apply,files,effect,
bin_opt_info,bin_construction,comprehensions,maps,
- redundant_boolean_clauses]}].
+ redundant_boolean_clauses,latin1_fallback]}].
init_per_suite(Config) ->
Config.
@@ -591,6 +592,37 @@ redundant_boolean_clauses(Config) when is_list(Config) ->
run(Config, Ts),
ok.
+latin1_fallback(Conf) when is_list(Conf) ->
+ DataDir = ?privdir,
+ IncFile = filename:join(DataDir, "include_me.hrl"),
+ file:write_file(IncFile, <<"%% ",246," in include file\n">>),
+ Ts1 = [{latin1_fallback1,
+ %% Test that the compiler fall backs to latin-1 with
+ %% a warning if a file has no encoding and does not
+ %% contain correct UTF-8 sequences.
+ <<"%% Bj",246,"rn
+ t(_) -> \"",246,"\";
+ t(x) -> ok.
+ ">>,
+ [],
+ {warnings,[{1,compile,reparsing_invalid_unicode},
+ {3,sys_core_fold,{nomatch_shadow,2}}]}}],
+ [] = run(Conf, Ts1),
+
+ Ts2 = [{latin1_fallback2,
+ %% Test that the compiler fall backs to latin-1 with
+ %% a warning if a file has no encoding and does not
+ %% contain correct UTF-8 sequences.
+ <<"
+
+ -include(\"include_me.hrl\").
+ ">>,
+ [],
+ {warnings,[{1,compile,reparsing_invalid_unicode}]}
+ }],
+ [] = run(Conf, Ts2),
+ ok.
+
%%%
%%% End of test cases.
%%%