diff options
author | Björn Gustavsson <[email protected]> | 2014-03-03 13:50:23 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2014-03-18 17:47:51 +0100 |
commit | bfe7e45cda6591dc31405ed1be961f079cc541c9 (patch) | |
tree | 45c894c1fb3a291d6ab7e5e083b648640554e540 /lib/compiler/src | |
parent | f3cee0e9f409c5850709f11ba15cec22d7387401 (diff) | |
download | otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.gz otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.tar.bz2 otp-bfe7e45cda6591dc31405ed1be961f079cc541c9.zip |
Don't fail compilation for modules that contain invalid UTF-8
The default encoding for Erlang modules is now UTF-8, and the
compilation would fail if a module contained byte sequences that
are not valid UTF-8 sequences.
In a large project with say many hundreds of Erlang modules
with names of developers such as "Björn" or "Håkan" encoded in
latin-1, that could mean that many hundreds of files would need
to be modified just to get started testing OTP 17.
As a temporary measure to ease the transition, automatically
fall back to the latin-1 encoding with a warning for any module
that contains invalid byte sequences and for which no encoding
has been specified.
The intention is to remove this workaround in OTP 18 or 19.
Diffstat (limited to 'lib/compiler/src')
-rw-r--r-- | lib/compiler/src/compile.erl | 59 |
1 files changed, 50 insertions, 9 deletions
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 9030dd998b..c7d91070f6 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -234,7 +234,9 @@ format_error({crash,Pass,Reason}) -> format_error({bad_return,Pass,Reason}) -> io_lib:format("internal error in ~p;\nbad return value: ~ts", [Pass,format_error_reason(Reason)]); format_error({module_name,Mod,Filename}) -> - io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]). + io_lib:format("Module name '~s' does not match file name '~ts'", [Mod,Filename]); +format_error(reparsing_invalid_unicode) -> + "Non-UTF-8 character(s) detected, but no encoding declared. Encode the file in UTF-8 or add \"%% coding: latin-1\" at the beginning of the file. Retrying with latin-1 encoding.". format_error_reason({Reason, Stack}) when is_list(Stack) -> StackFun = fun @@ -792,20 +794,59 @@ no_native_compilation(BeamFile, #compile{options=Opts0}) -> _ -> false end. -parse_module(St) -> - Opts = St#compile.options, - Cwd = ".", - IncludePath = [Cwd, St#compile.dir|inc_paths(Opts)], - R = epp:parse_file(St#compile.ifile, IncludePath, pre_defs(Opts)), +parse_module(St0) -> + case do_parse_module(utf8, St0) of + {ok,_}=Ret -> + Ret; + {error,_}=Ret -> + Ret; + {invalid_unicode,File,Line} -> + case do_parse_module(latin1, St0) of + {ok,St} -> + Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}], + {ok,St#compile{warnings=Es++St#compile.warnings}}; + {error,St} -> + Es = [{File,[{Line,?MODULE,reparsing_invalid_unicode}]}], + {error,St#compile{errors=Es++St#compile.errors}} + end + end. + +do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) -> + R = epp:parse_file(File, + [{includes,[".",Dir|inc_paths(Opts)]}, + {macros,pre_defs(Opts)}, + {default_encoding,DefEncoding}, + extra]), case R of - {ok,Forms} -> - Encoding = epp:read_encoding(St#compile.ifile), - {ok,St#compile{code=Forms,encoding=Encoding}}; + {ok,Forms,Extra} -> + Encoding = proplists:get_value(encoding, Extra), + case find_invalid_unicode(Forms, File) of + none -> + {ok,St#compile{code=Forms,encoding=Encoding}}; + {invalid_unicode,_,_}=Ret -> + case Encoding of + none -> + Ret; + _ -> + {ok,St#compile{code=Forms,encoding=Encoding}} + end + end; {error,E} -> Es = [{St#compile.ifile,[{none,?MODULE,{epp,E}}]}], {error,St#compile{errors=St#compile.errors ++ Es}} end. +find_invalid_unicode([H|T], File0) -> + case H of + {attribute,_,file,{File,_}} -> + find_invalid_unicode(T, File); + {error,{Line,file_io_server,invalid_unicode}} -> + {invalid_unicode,File0,Line}; + _Other -> + find_invalid_unicode(T, File0) + end; +find_invalid_unicode([], _) -> none. + parse_core(St) -> case file:read_file(St#compile.ifile) of {ok,Bin} -> |