aboutsummaryrefslogtreecommitdiffstats
path: root/lib/edoc/src
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2014-06-25 15:26:45 +0200
committerHans Bolinder <[email protected]>2014-06-26 14:36:34 +0200
commitde3774d93b061a54d33c7c58f07c69cbbb36cadf (patch)
tree3ded8f54f2917a7276a2f7a73fcdc73417e6c4bf /lib/edoc/src
parent461dc05384eece7b4b7d84370fb0a2cf96ed2f6d (diff)
downloadotp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.gz
otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.bz2
otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.zip
edoc, syntax_tools: Don't fail on invalid UTF-8
As a temporary measure to ease the transition to default UTF-8 encoding, automatically fall back to the Latin-1 encoding (without any warnings; the Erlang Compiler will emit a proper warning). The intention is to remove this workaround in OTP 18 or 19.
Diffstat (limited to 'lib/edoc/src')
-rw-r--r--lib/edoc/src/edoc.erl35
1 files changed, 32 insertions, 3 deletions
diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl
index a87a8471e3..983f04e8b6 100644
--- a/lib/edoc/src/edoc.erl
+++ b/lib/edoc/src/edoc.erl
@@ -696,15 +696,44 @@ read_source_2(Name, Opts) ->
%% The line of the dot token will be copied to the integer token.
parse_file(Name, Includes, Macros) ->
- case epp:open(Name, Includes, Macros) of
- {ok, Epp} ->
- try {ok, parse_file(Epp)}
+ case parse_file(utf8, Name, Includes, Macros) of
+ invalid_unicode ->
+ parse_file(latin1, Name, Includes, Macros);
+ Ret ->
+ Ret
+ end.
+
+parse_file(DefEncoding, Name, Includes, Macros) ->
+ Options = [{name, Name},
+ {includes, Includes},
+ {macros, Macros},
+ {default_encoding, DefEncoding}],
+ case epp:open([extra | Options]) of
+ {ok, Epp, Extra} ->
+ try parse_file(Epp) of
+ Forms ->
+ Encoding = proplists:get_value(encoding, Extra),
+ case find_invalid_unicode(Forms) of
+ invalid_unicode when Encoding =/= utf8 ->
+ invalid_unicode;
+ _ ->
+ {ok, Forms}
+ end
after _ = epp:close(Epp)
end;
Error ->
Error
end.
+find_invalid_unicode([H|T]) ->
+ case H of
+ {error,{_Line,file_io_server,invalid_unicode}} ->
+ invalid_unicode;
+ _Other ->
+ find_invalid_unicode(T)
+ end;
+find_invalid_unicode([]) -> none.
+
parse_file(Epp) ->
case scan_and_parse(Epp) of
{ok, Form} ->