aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src/compile.erl
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-03-03 13:50:23 +0100
committerBjörn Gustavsson <[email protected]>2014-03-18 17:47:51 +0100
commitbfe7e45cda6591dc31405ed1be961f079cc541c9 (patch)
tree45c894c1fb3a291d6ab7e5e083b648640554e540 /lib/compiler/src/compile.erl
parentf3cee0e9f409c5850709f11ba15cec22d7387401 (diff)
downloadotp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.gz
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.bz2
otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.zip
Don't fail compilation for modules that contain invalid UTF-8
The default encoding for Erlang modules is now UTF-8, and the compilation would fail if a module contained byte sequences that are not valid UTF-8 sequences. In a large project with say many hundreds of Erlang modules with names of developers such as "Björn" or "Håkan" encoded in latin-1, that could mean that many hundreds of files would need to be modified just to get started testing OTP 17. As a temporary measure to ease the transition, automatically fall back to the latin-1 encoding with a warning for any module that contains invalid byte sequences and for which no encoding has been specified. The intention is to remove this workaround in OTP 18 or 19.
Diffstat (limited to 'lib/compiler/src/compile.erl')
-rw-r--r--lib/compiler/src/compile.erl59
1 files changed, 50 insertions, 9 deletions
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 9030dd998b..c7d91070f6 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -234,7 +234,9 @@ format_error({crash,Pass,Reason}) ->
format_error({bad_return,Pass,Reason}) ->
io_lib:format("internal error in ~p;\nbad return value: ~ts", [Pass,format_error_reason(Reason)]);
format_error({module_name,Mod,Filename}) ->
- io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]).
+ io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]);
+format_error(reparsing_invalid_unicode) ->
+ "Non-UTF-8 character(s) detected, but no encoding declared. Encode the file in UTF-8 or add \"%% coding: latin-1\" at the beginning of the file. Retrying with latin-1 encoding.".
format_error_reason({Reason, Stack}) when is_list(Stack) ->
StackFun = fun
@@ -792,20 +794,59 @@ no_native_compilation(BeamFile, #compile{options=Opts0}) ->
_ -> false
end.
-parse_module(St) ->
- Opts = St#compile.options,
- Cwd = ".",
- IncludePath = [Cwd, St#compile.dir|inc_paths(Opts)],
- R = epp:parse_file(St#compile.ifile, IncludePath, pre_defs(Opts)),
+parse_module(St0) ->
+ case do_parse_module(utf8, St0) of
+ {ok,_}=Ret ->
+ Ret;
+ {error,_}=Ret ->
+ Ret;
+ {invalid_unicode,File,Line} ->
+ case do_parse_module(latin1, St0) of
+ {ok,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {ok,St#compile{warnings=Es++St#compile.warnings}};
+ {error,St} ->
+ Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}],
+ {error,St#compile{errors=Es++St#compile.errors}}
+ end
+ end.
+
+do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) ->
+ R = epp:parse_file(File,
+ [{includes,[".",Dir|inc_paths(Opts)]},
+ {macros,pre_defs(Opts)},
+ {default_encoding,DefEncoding},
+ extra]),
case R of
- {ok,Forms} ->
- Encoding = epp:read_encoding(St#compile.ifile),
- {ok,St#compile{code=Forms,encoding=Encoding}};
+ {ok,Forms,Extra} ->
+ Encoding = proplists:get_value(encoding, Extra),
+ case find_invalid_unicode(Forms, File) of
+ none ->
+ {ok,St#compile{code=Forms,encoding=Encoding}};
+ {invalid_unicode,_,_}=Ret ->
+ case Encoding of
+ none ->
+ Ret;
+ _ ->
+ {ok,St#compile{code=Forms,encoding=Encoding}}
+ end
+ end;
{error,E} ->
Es = [{St#compile.ifile,[{none,?MODULE,{epp,E}}]}],
{error,St#compile{errors=St#compile.errors ++ Es}}
end.
+find_invalid_unicode([H|T], File0) ->
+ case H of
+ {attribute,_,file,{File,_}} ->
+ find_invalid_unicode(T, File);
+ {error,{Line,file_io_server,invalid_unicode}} ->
+ {invalid_unicode,File0,Line};
+ _Other ->
+ find_invalid_unicode(T, File0)
+ end;
+find_invalid_unicode([], _) -> none.
+
parse_core(St) ->
case file:read_file(St#compile.ifile) of
{ok,Bin} ->