From de3774d93b061a54d33c7c58f07c69cbbb36cadf Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 25 Jun 2014 15:26:45 +0200 Subject: edoc, syntax_tools: Don't fail on invalid UTF-8 As a temporary measure to ease the transition to default UTF-8 encoding, automatically fall back to the Latin-1 encoding (without any warnings; the Erlang Compiler will emit a proper warning). The intention is to remove this workaround in OTP 18 or 19. --- lib/syntax_tools/src/epp_dodger.erl | 28 +++++++++++++++++++++++++++- lib/syntax_tools/src/erl_comment_scan.erl | 13 ++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'lib/syntax_tools') diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl index 131be4e8e4..7e12eab1b5 100644 --- a/lib/syntax_tools/src/epp_dodger.erl +++ b/lib/syntax_tools/src/epp_dodger.erl @@ -184,9 +184,27 @@ quick_parse_file(File, Options) -> parse_file(File, fun quick_parse/3, Options ++ [no_fail]). parse_file(File, Parser, Options) -> + case do_parse_file(utf8, File, Parser, Options) of + {ok, Forms}=Ret -> + case find_invalid_unicode(Forms) of + none -> + Ret; + invalid_unicode -> + case epp:read_encoding(File) of + utf8 -> + Ret; + _ -> + do_parse_file(latin1, File, Parser, Options) + end + end; + Else -> + Else + end. + +do_parse_file(DefEncoding, File, Parser, Options) -> case file:open(File, [read]) of {ok, Dev} -> - _ = epp:set_encoding(Dev), + _ = epp:set_encoding(Dev, DefEncoding), try Parser(Dev, 1, Options) after ok = file:close(Dev) end; @@ -194,6 +212,14 @@ parse_file(File, Parser, Options) -> Error end. +find_invalid_unicode([H|T]) -> + case H of + {error, {_Line, file_io_server, invalid_unicode}} -> + invalid_unicode; + _Other -> + find_invalid_unicode(T) + end; +find_invalid_unicode([]) -> none. %% ===================================================================== %% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()} diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl index dae7530ce7..03429d4d42 100644 --- a/lib/syntax_tools/src/erl_comment_scan.erl +++ b/lib/syntax_tools/src/erl_comment_scan.erl @@ -72,13 +72,24 @@ file(Name) -> {ok, V} -> case V of {ok, B} -> - Enc = case epp:read_encoding(Name) of + Encoding = epp:read_encoding_from_binary(B), + Enc = case Encoding of none -> epp:default_encoding(); Enc0 -> Enc0 end, case catch unicode:characters_to_list(B, Enc) of String when is_list(String) -> string(String); + R when Encoding =:= none -> + case + catch unicode:characters_to_list(B, latin1) + of + String when is_list(String) -> + string(String); + _ -> + error_read_file(Name1), + exit(R) + end; R -> error_read_file(Name1), exit(R) -- cgit v1.2.3 From dba1d881f3232a939c6620f5fd6b4a97ff454bee Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Tue, 1 Jul 2014 13:58:47 +0200 Subject: Prepare release --- lib/syntax_tools/doc/src/notes.xml | 19 +++++++++++++++++++ lib/syntax_tools/vsn.mk | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'lib/syntax_tools') diff --git a/lib/syntax_tools/doc/src/notes.xml b/lib/syntax_tools/doc/src/notes.xml index 2618f005a6..8384af53b0 100644 --- a/lib/syntax_tools/doc/src/notes.xml +++ b/lib/syntax_tools/doc/src/notes.xml @@ -31,6 +31,25 @@

This document describes the changes made to the Syntax_Tools application.

+
Syntax_Tools 1.6.16 + +
Fixed Bugs and Malfunctions + + +

The default encoding for Erlang source files is now + UTF-8. As a temporary measure to ease the transition from + the old default of Latin-1, if EDoc encounters byte + sequences that are not valid UTF-8 sequences, EDoc will + re-try in Latin-1 mode. This workaround will be removed + in a future release.

+

+ Own Id: OTP-12008

+
+
+
+ +
+
Syntax_Tools 1.6.15
Fixed Bugs and Malfunctions diff --git a/lib/syntax_tools/vsn.mk b/lib/syntax_tools/vsn.mk index d3703928da..6a80734f83 100644 --- a/lib/syntax_tools/vsn.mk +++ b/lib/syntax_tools/vsn.mk @@ -1 +1 @@ -SYNTAX_TOOLS_VSN = 1.6.15 +SYNTAX_TOOLS_VSN = 1.6.16 -- cgit v1.2.3