diff options
author | Björn Gustavsson <[email protected]> | 2014-03-19 11:51:46 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2014-03-19 11:51:46 +0100 |
commit | 17944ea74120ac831b85d0c3cfe2be419285c6d7 (patch) | |
tree | 933fb986a8833cb7e0c42eefd00f3f414bf0c225 /lib/stdlib | |
parent | 539614ad82cfd0ced75e9f43fa76b013d4598d01 (diff) | |
parent | bfe7e45cda6591dc31405ed1be961f079cc541c9 (diff) | |
download | otp-17944ea74120ac831b85d0c3cfe2be419285c6d7.tar.gz otp-17944ea74120ac831b85d0c3cfe2be419285c6d7.tar.bz2 otp-17944ea74120ac831b85d0c3cfe2be419285c6d7.zip |
Merge branch 'bjorn/compiler/utf8-warning/OTP-11791'
* bjorn/compiler/utf8-warning/OTP-11791:
Don't fail compilation for modules that contain invalid UTF-8
epp: Make it possible to specify a default encoding
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/epp.xml | 56 | ||||
-rw-r--r-- | lib/stdlib/src/epp.erl | 147 | ||||
-rw-r--r-- | lib/stdlib/test/epp_SUITE.erl | 78 |
3 files changed, 230 insertions, 51 deletions
diff --git a/lib/stdlib/doc/src/epp.xml b/lib/stdlib/doc/src/epp.xml index cf33530395..50baad04a9 100644 --- a/lib/stdlib/doc/src/epp.xml +++ b/lib/stdlib/doc/src/epp.xml @@ -64,11 +64,29 @@ </datatypes> <funcs> <func> + <name name="open" arity="1"/> + <fsummary>Open a file for preprocessing</fsummary> + <desc> + <p>Opens a file for preprocessing.</p> + <p>If <c>extra</c> is given in + <c><anno>Options</anno></c>, the return value will be + <c>{ok, <anno>Epp</anno>, <anno>Extra</anno>}</c> instead + of <c>{ok, <anno>Epp</anno>}</c>.</p> + </desc> + </func> + <func> <name name="open" arity="2"/> + <fsummary>Open a file for preprocessing</fsummary> + <desc> + <p>Equivalent to <c>epp:open([{name, FileName}, {includes, IncludePath}])</c>.</p> + </desc> + </func> + <func> <name name="open" arity="3"/> <fsummary>Open a file for preprocessing</fsummary> <desc> - <p>Opens a file for preprocessing.</p> + <p>Equivalent to <c>epp:open([{name, FileName}, {includes, IncludePath}, + {macros, PredefMacros}])</c>.</p> </desc> </func> <func> @@ -89,12 +107,24 @@ </desc> </func> <func> - <name name="parse_file" arity="3"/> + <name name="parse_file" arity="2"/> <fsummary>Preprocess and parse an Erlang source file</fsummary> <desc> <p>Preprocesses and parses an Erlang source file. - Note that the tuple <c>{eof, <anno>Line</anno>}</c> returned at end-of-file is - included as a "form".</p> + Note that the tuple <c>{eof, <anno>Line</anno>}</c> returned + at end-of-file is included as a "form".</p> + <p>If <c>extra</c> is given in + <c><anno>Options</anno></c>, the return value will be + <c>{ok, [<anno>Form</anno>], <anno>Extra</anno>}</c> instead + of <c>{ok, [<anno>Form</anno>]}</c>.</p> + </desc> + </func> + <func> + <name name="parse_file" arity="3"/> + <fsummary>Preprocess and parse an Erlang source file</fsummary> + <desc> + <p>Equivalent to <c>epp:parse_file(FileName, [{includes, IncludePath}, + {macros, PredefMacros}])</c>.</p> </desc> </func> <func> @@ -111,7 +141,7 @@ <p>Returns a string representation of an encoding. The string is recognized by <c>read_encoding/1,2</c>, <c>read_encoding_from_binary/1,2</c>, and - <c>set_encoding/1</c> as a valid encoding.</p> + <c>set_encoding/1,2</c> as a valid encoding.</p> </desc> </func> <func> @@ -157,6 +187,22 @@ </desc> </func> <func> + <name name="set_encoding" arity="2"/> + <fsummary>Read and set the encoding of an IO device</fsummary> + <desc> + <p>Reads the <seealso marker="#encoding">encoding</seealso> from + an IO device and sets the encoding of the device + accordingly. The position of the IO device referenced by + <c><anno>File</anno></c> is not affected. If no valid + encoding can be read from the IO device the encoding of the + IO device is set to the + <seealso marker="#encoding">encoding</seealso> given by + <c><anno>Default</anno></c>.</p> + <p>Returns the read encoding, or <c>none</c> if no valid + encoding was found.</p> + </desc> + </func> + <func> <name name="format_error" arity="1"/> <fsummary>Format an error descriptor</fsummary> <desc> diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index 68e079b7e5..d212a55b47 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -20,12 +20,12 @@ %% An Erlang code preprocessor. --export([open/2,open/3,open/5,close/1,format_error/1]). +-export([open/1, open/2,open/3,open/5,close/1,format_error/1]). -export([scan_erl_form/1,parse_erl_form/1,macro_defs/1]). --export([parse_file/1, parse_file/3]). +-export([parse_file/1, parse_file/2, parse_file/3]). -export([default_encoding/0, encoding_to_string/1, read_encoding_from_binary/1, read_encoding_from_binary/2, - set_encoding/1, read_encoding/1, read_encoding/2]). + set_encoding/1, set_encoding/2, read_encoding/1, read_encoding/2]). -export([interpret_file_attribute/1]). -export([normalize_typed_record_fields/1,restore_typed_record_fields/1]). @@ -37,9 +37,11 @@ -type epp_handle() :: pid(). -type source_encoding() :: latin1 | utf8. +-define(DEFAULT_ENCODING, utf8). + %% Epp state record. -record(epp, {file, %Current file - location, %Current location + location=1, %Current location delta, %Offset from Location (-file) name="", %Current file name name2="", %-"-, modified by -file @@ -48,6 +50,7 @@ path=[], %Include-path macs = dict:new() :: dict:dict(),%Macros (don't care locations) uses = dict:new() :: dict:dict(),%Macro use structure + default_encoding = ?DEFAULT_ENCODING :: source_encoding(), pre_opened = false :: boolean() }). @@ -58,6 +61,7 @@ %%% distinction in the internal representation would simplify the code %%% a little. +%% open(Options) %% open(FileName, IncludePath) %% open(FileName, IncludePath, PreDefMacros) %% open(FileName, IoDevice, StartLocation, IncludePath, PreDefMacros) @@ -65,6 +69,7 @@ %% scan_erl_form(Epp) %% parse_erl_form(Epp) %% parse_file(Epp) +%% parse_file(FileName, Options) %% parse_file(FileName, IncludePath, PreDefMacros) %% macro_defs(Epp) @@ -87,14 +92,43 @@ open(Name, Path) -> ErrorDescriptor :: term(). open(Name, Path, Pdm) -> - Self = self(), - Epp = spawn(fun() -> server(Self, Name, Path, Pdm) end), - epp_request(Epp). + internal_open([{name, Name}, {includes, Path}, {macros, Pdm}], #epp{}). open(Name, File, StartLocation, Path, Pdm) -> - Self = self(), - Epp = spawn(fun() -> server(Self, Name, File, StartLocation,Path,Pdm) end), - epp_request(Epp). + internal_open([{name, Name}, {includes, Path}, {macros, Pdm}], + #epp{file=File, pre_opened=true, location=StartLocation}). + +-spec open(Options) -> + {'ok', Epp} | {'ok', Epp, Extra} | {'error', ErrorDescriptor} when + Options :: [{'default_encoding', DefEncoding :: source_encoding()} | + {'includes', IncludePath :: [DirectoryName :: file:name()]} | + {'macros', PredefMacros :: macros()} | + {'name',FileName :: file:name()} | + 'extra'], + Epp :: epp_handle(), + Extra :: [{'encoding', source_encoding() | 'none'}], + ErrorDescriptor :: term(). + +open(Options) -> + internal_open(Options, #epp{}). + +internal_open(Options, St) -> + case proplists:get_value(name, Options) of + undefined -> + erlang:error(badarg); + Name -> + Self = self(), + Epp = spawn(fun() -> server(Self, Name, Options, St) end), + case epp_request(Epp) of + {ok, Pid, Encoding} -> + case proplists:get_bool(extra, Options) of + true -> {ok, Pid, [{encoding, Encoding}]}; + false -> {ok, Pid} + end; + Other -> + Other + end + end. -spec close(Epp) -> 'ok' when Epp :: epp_handle(). @@ -170,9 +204,6 @@ format_error({'NYI',What}) -> io_lib:format("not yet implemented '~s'", [What]); format_error(E) -> file:format_error(E). -%% parse_file(FileName, IncludePath, [PreDefMacro]) -> -%% {ok,[Form]} | {error,OpenError} - -spec parse_file(FileName, IncludePath, PredefMacros) -> {'ok', [Form]} | {error, OpenError} when FileName :: file:name(), @@ -184,17 +215,40 @@ format_error(E) -> file:format_error(E). OpenError :: file:posix() | badarg | system_limit. parse_file(Ifile, Path, Predefs) -> - case open(Ifile, Path, Predefs) of + parse_file(Ifile, [{includes, Path}, {macros, Predefs}]). + +-spec parse_file(FileName, Options) -> + {'ok', [Form]} | {'ok', [Form], Extra} | {error, OpenError} when + FileName :: file:name(), + Options :: [{'includes', IncludePath :: [DirectoryName :: file:name()]} | + {'macros', PredefMacros :: macros()} | + {'default_encoding', DefEncoding :: source_encoding()} | + 'extra'], + Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line}, + Line :: erl_scan:line(), + ErrorInfo :: erl_scan:error_info() | erl_parse:error_info(), + Extra :: [{'encoding', source_encoding() | 'none'}], + OpenError :: file:posix() | badarg | system_limit. + +parse_file(Ifile, Options) -> + case internal_open([{name, Ifile} | Options], #epp{}) of {ok,Epp} -> Forms = parse_file(Epp), close(Epp), {ok,Forms}; + {ok,Epp,Extra} -> + Forms = parse_file(Epp), + close(Epp), + {ok,Forms,Extra}; {error,E} -> {error,E} end. -%% parse_file(Epp) -> -%% [Form] +-spec parse_file(Epp) -> [Form] when + Epp :: epp_handle(), + Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line}, + Line :: erl_scan:line(), + ErrorInfo :: erl_scan:error_info() | erl_parse:error_info(). parse_file(Epp) -> case parse_erl_form(Epp) of @@ -219,8 +273,6 @@ parse_file(Epp) -> [{eof,Location}] end. --define(DEFAULT_ENCODING, utf8). - -spec default_encoding() -> source_encoding(). default_encoding() -> @@ -258,9 +310,16 @@ read_encoding(Name, Options) -> File :: io:device(). % pid(); raw files don't work set_encoding(File) -> + set_encoding(File, ?DEFAULT_ENCODING). + +-spec set_encoding(File, Default) -> source_encoding() | none when + Default :: source_encoding(), + File :: io:device(). % pid(); raw files don't work + +set_encoding(File, Default) -> Encoding = read_encoding_from_file(File, true), Enc = case Encoding of - none -> default_encoding(); + none -> Default; Encoding -> Encoding end, ok = io:setopts(File, [{encoding, Enc}]), @@ -446,35 +505,37 @@ restore_typed_record_fields([{attribute,La,type,{{record,Record},Fields,[]}}| restore_typed_record_fields([Form|Forms]) -> [Form|restore_typed_record_fields(Forms)]. -%% server(StarterPid, FileName, Path, PreDefMacros) - -server(Pid, Name, Path, Pdm) -> +server(Pid, Name, Options, #epp{pre_opened=PreOpened}=St) -> process_flag(trap_exit, true), - case file:open(Name, [read]) of - {ok,File} -> - Location = 1, - init_server(Pid, Name, File, Location, Path, Pdm, false); - {error,E} -> - epp_reply(Pid, {error,E}) + case PreOpened of + false -> + case file:open(Name, [read]) of + {ok,File} -> + init_server(Pid, Name, Options, St#epp{file = File}); + {error,E} -> + epp_reply(Pid, {error,E}) + end; + true -> + init_server(Pid, Name, Options, St) end. -%% server(StarterPid, FileName, IoDevice, Location, Path, PreDefMacros) -server(Pid, Name, File, AtLocation, Path, Pdm) -> - process_flag(trap_exit, true), - init_server(Pid, Name, File, AtLocation, Path, Pdm, true). - -init_server(Pid, Name, File, AtLocation, Path, Pdm, Pre) -> +init_server(Pid, Name, Options, St0) -> + Pdm = proplists:get_value(macros, Options, []), Ms0 = predef_macros(Name), case user_predef(Pdm, Ms0) of {ok,Ms1} -> - _ = set_encoding(File), - epp_reply(Pid, {ok,self()}), + #epp{file = File, location = AtLocation} = St0, + DefEncoding = proplists:get_value(default_encoding, Options, + ?DEFAULT_ENCODING), + Encoding = set_encoding(File, DefEncoding), + epp_reply(Pid, {ok,self(),Encoding}), %% ensure directory of current source file is %% first in path - Path1 = [filename:dirname(Name) | Path], - St = #epp{file=File, location=AtLocation, delta=0, - name=Name, name2=Name, path=Path1, macs=Ms1, - pre_opened = Pre}, + Path = [filename:dirname(Name) | + proplists:get_value(includes, Options, [])], + St = St0#epp{delta=0, name=Name, name2=Name, + path=Path, macs=Ms1, + default_encoding=DefEncoding}, From = wait_request(St), enter_file_reply(From, Name, AtLocation, AtLocation), wait_req_scan(St); @@ -600,9 +661,11 @@ enter_file2(NewF, Pname, From, St0, AtLocation) -> %% the path) must be dropped, otherwise the path used within the current %% file will depend on the order of file inclusions in the parent files Path = [filename:dirname(Pname) | tl(St0#epp.path)], - _ = set_encoding(NewF), + DefEncoding = St0#epp.default_encoding, + _ = set_encoding(NewF, DefEncoding), #epp{file=NewF,location=Loc,name=Pname,name2=Pname,delta=0, - sstk=[St0|St0#epp.sstk],path=Path,macs=Ms}. + sstk=[St0|St0#epp.sstk],path=Path,macs=Ms, + default_encoding=DefEncoding}. enter_file_reply(From, Name, Location, AtLocation) -> Attr = loc_attr(AtLocation), diff --git a/lib/stdlib/test/epp_SUITE.erl b/lib/stdlib/test/epp_SUITE.erl index 0b4726c07a..b17e8bd186 100644 --- a/lib/stdlib/test/epp_SUITE.erl +++ b/lib/stdlib/test/epp_SUITE.erl @@ -26,7 +26,7 @@ pmod/1, not_circular/1, skip_header/1, otp_6277/1, otp_7702/1, otp_8130/1, overload_mac/1, otp_8388/1, otp_8470/1, otp_8503/1, otp_8562/1, otp_8665/1, otp_8911/1, otp_10302/1, otp_10820/1, - otp_11728/1]). + otp_11728/1, encoding/1]). -export([epp_parse_erl_form/2]). @@ -68,7 +68,8 @@ all() -> {group, variable}, otp_4870, otp_4871, otp_5362, pmod, not_circular, skip_header, otp_6277, otp_7702, otp_8130, overload_mac, otp_8388, otp_8470, otp_8503, otp_8562, - otp_8665, otp_8911, otp_10302, otp_10820, otp_11728]. + otp_8665, otp_8911, otp_10302, otp_10820, otp_11728, + encoding]. groups() -> [{upcase_mac, [], [upcase_mac_1, upcase_mac_2]}, @@ -123,10 +124,22 @@ include_local(Config) when is_list(Config) -> %%% regular epp:parse_file, the test case will time out, and then epp %%% server will go on growing until we dump core. epp_parse_file(File, Inc, Predef) -> - {ok, Epp} = epp:open(File, Inc, Predef), + List = do_epp_parse_file(fun() -> + epp:open(File, Inc, Predef) + end), + List = do_epp_parse_file(fun() -> + Opts = [{name, File}, + {includes, Inc}, + {macros, Predef}], + epp:open(Opts) + end), + {ok, List}. + +do_epp_parse_file(Open) -> + {ok, Epp} = Open(), List = collect_epp_forms(Epp), epp:close(Epp), - {ok, List}. + List. collect_epp_forms(Epp) -> Result = epp_parse_erl_form(Epp), @@ -1413,6 +1426,63 @@ otp_11728(Config) when is_list(Config) -> _ = file:delete(ErlFile), ok. +%% Check the new API for setting the default encoding. +encoding(Config) when is_list(Config) -> + Dir = ?config(priv_dir, Config), + ErlFile = filename:join(Dir, "encoding.erl"), + + %% Try a latin-1 file with no encoding given. + C1 = <<"-module(encoding). + %% ",246," + ">>, + ok = file:write_file(ErlFile, C1), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {error,_}, + {error,{2,epp,cannot_parse}}, + {eof,2}]} = epp:parse_file(ErlFile, []), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,3}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,3}],[{encoding,none}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1},extra]), + + %% Try a latin-1 file with encoding given in a comment. + C2 = <<"-module(encoding). + %% encoding: latin-1 + %% ",246," + ">>, + ok = file:write_file(ErlFile, C2), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, []), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, [{default_encoding,utf8}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [extra]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1},extra]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [{default_encoding,utf8},extra]), + ok. + + check(Config, Tests) -> eval_tests(Config, fun check_test/2, Tests). |