diff options
| author | Hans Bolinder <[email protected]> | 2014-06-25 15:26:45 +0200 | 
|---|---|---|
| committer | Hans Bolinder <[email protected]> | 2014-06-26 14:36:34 +0200 | 
| commit | de3774d93b061a54d33c7c58f07c69cbbb36cadf (patch) | |
| tree | 3ded8f54f2917a7276a2f7a73fcdc73417e6c4bf /lib/syntax_tools/src | |
| parent | 461dc05384eece7b4b7d84370fb0a2cf96ed2f6d (diff) | |
| download | otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.gz otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.tar.bz2 otp-de3774d93b061a54d33c7c58f07c69cbbb36cadf.zip | |
edoc, syntax_tools: Don't fail on invalid UTF-8
As a temporary measure to ease the transition to default UTF-8
encoding, automatically fall back to the Latin-1 encoding (without any
warnings; the Erlang Compiler will emit a proper warning).
The intention is to remove this workaround in OTP 18 or 19.
Diffstat (limited to 'lib/syntax_tools/src')
| -rw-r--r-- | lib/syntax_tools/src/epp_dodger.erl | 28 | ||||
| -rw-r--r-- | lib/syntax_tools/src/erl_comment_scan.erl | 13 | 
2 files changed, 39 insertions, 2 deletions
| diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl index 131be4e8e4..7e12eab1b5 100644 --- a/lib/syntax_tools/src/epp_dodger.erl +++ b/lib/syntax_tools/src/epp_dodger.erl @@ -184,9 +184,27 @@ quick_parse_file(File, Options) ->      parse_file(File, fun quick_parse/3, Options ++ [no_fail]).  parse_file(File, Parser, Options) -> +    case do_parse_file(utf8, File, Parser, Options) of +        {ok, Forms}=Ret -> +            case find_invalid_unicode(Forms) of +                none -> +                    Ret; +                invalid_unicode -> +                    case epp:read_encoding(File) of +                        utf8 -> +                            Ret; +                        _ -> +                            do_parse_file(latin1, File, Parser, Options) +                    end +            end; +        Else -> +            Else +    end. + +do_parse_file(DefEncoding, File, Parser, Options) ->      case file:open(File, [read]) of          {ok, Dev} -> -            _ = epp:set_encoding(Dev), +            _ = epp:set_encoding(Dev, DefEncoding),              try Parser(Dev, 1, Options)              after ok = file:close(Dev)  	    end; @@ -194,6 +212,14 @@ parse_file(File, Parser, Options) ->              Error      end. +find_invalid_unicode([H|T]) -> +    case H of +	{error, {_Line, file_io_server, invalid_unicode}} -> +	    invalid_unicode; +	_Other -> +	    find_invalid_unicode(T) +    end; +find_invalid_unicode([]) -> none.  %% =====================================================================  %% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()} diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl index dae7530ce7..03429d4d42 100644 --- a/lib/syntax_tools/src/erl_comment_scan.erl +++ b/lib/syntax_tools/src/erl_comment_scan.erl @@ -72,13 +72,24 @@ file(Name) ->  	{ok, V} ->  	    case V of  		{ok, B} -> -                    Enc = case epp:read_encoding(Name) of +                    Encoding = epp:read_encoding_from_binary(B), +                    Enc = case Encoding of                                none -> epp:default_encoding();                                Enc0 -> Enc0                            end,                      case catch unicode:characters_to_list(B, Enc) of                          String when is_list(String) ->                              string(String); +                        R when Encoding =:= none -> +                            case +                              catch unicode:characters_to_list(B, latin1) +                            of +                                String when is_list(String) -> +                                    string(String); +                                _ -> +                                    error_read_file(Name1), +                                    exit(R) +                            end;                          R ->                              error_read_file(Name1),                              exit(R) | 
