From f3cee0e9f409c5850709f11ba15cec22d7387401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 3 Mar 2014 13:50:23 +0100 Subject: epp: Make it possible to specify a default encoding In the next commit, we will need a way to tell epp which the default encoding should be for files that have no encoding comment. We could add new open() and parse_file() functions with one extra argument for the encoding, but there are already too many variants. To avoid having to add an additional argument to epp:open() and epp:parse_file() each time new options are needed, introduce epp:open/1 and epp:parse_file/2 that takes a property list with options. Also support the new 'default_encoding' option for specifying the default encoding for source files. Thanks to Richard Carlsson for the idea and the implementation of the new functionality in epp.erl. --- lib/stdlib/doc/src/epp.xml | 56 ++++++++++++++-- lib/stdlib/src/epp.erl | 147 ++++++++++++++++++++++++++++++------------ lib/stdlib/test/epp_SUITE.erl | 78 ++++++++++++++++++++-- 3 files changed, 230 insertions(+), 51 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/epp.xml b/lib/stdlib/doc/src/epp.xml index cf33530395..50baad04a9 100644 --- a/lib/stdlib/doc/src/epp.xml +++ b/lib/stdlib/doc/src/epp.xml @@ -63,12 +63,30 @@ + + + Open a file for preprocessing + +

Opens a file for preprocessing.

+

If extra is given in + Options, the return value will be + {ok, Epp, Extra} instead + of {ok, Epp}.

+
+
+ Open a file for preprocessing + +

Equivalent to epp:open([{name, FileName}, {includes, IncludePath}]).

+
+
+ Open a file for preprocessing -

Opens a file for preprocessing.

+

Equivalent to epp:open([{name, FileName}, {includes, IncludePath}, + {macros, PredefMacros}]).

@@ -89,12 +107,24 @@ - + Preprocess and parse an Erlang source file

Preprocesses and parses an Erlang source file. - Note that the tuple {eof, Line} returned at end-of-file is - included as a "form".

+ Note that the tuple {eof, Line} returned + at end-of-file is included as a "form".

+

If extra is given in + Options, the return value will be + {ok, [Form], Extra} instead + of {ok, [Form]}.

+
+
+ + + Preprocess and parse an Erlang source file + +

Equivalent to epp:parse_file(FileName, [{includes, IncludePath}, + {macros, PredefMacros}]).

@@ -111,7 +141,7 @@

Returns a string representation of an encoding. The string is recognized by read_encoding/1,2, read_encoding_from_binary/1,2, and - set_encoding/1 as a valid encoding.

+ set_encoding/1,2 as a valid encoding.

@@ -156,6 +186,22 @@ encoding was found.

+ + + Read and set the encoding of an IO device + +

Reads the encoding from + an IO device and sets the encoding of the device + accordingly. The position of the IO device referenced by + File is not affected. If no valid + encoding can be read from the IO device the encoding of the + IO device is set to the + encoding given by + Default.

+

Returns the read encoding, or none if no valid + encoding was found.

+
+
Format an error descriptor diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index 68e079b7e5..d212a55b47 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -20,12 +20,12 @@ %% An Erlang code preprocessor. --export([open/2,open/3,open/5,close/1,format_error/1]). +-export([open/1, open/2,open/3,open/5,close/1,format_error/1]). -export([scan_erl_form/1,parse_erl_form/1,macro_defs/1]). --export([parse_file/1, parse_file/3]). +-export([parse_file/1, parse_file/2, parse_file/3]). -export([default_encoding/0, encoding_to_string/1, read_encoding_from_binary/1, read_encoding_from_binary/2, - set_encoding/1, read_encoding/1, read_encoding/2]). + set_encoding/1, set_encoding/2, read_encoding/1, read_encoding/2]). -export([interpret_file_attribute/1]). -export([normalize_typed_record_fields/1,restore_typed_record_fields/1]). @@ -37,9 +37,11 @@ -type epp_handle() :: pid(). -type source_encoding() :: latin1 | utf8. +-define(DEFAULT_ENCODING, utf8). + %% Epp state record. -record(epp, {file, %Current file - location, %Current location + location=1, %Current location delta, %Offset from Location (-file) name="", %Current file name name2="", %-"-, modified by -file @@ -48,6 +50,7 @@ path=[], %Include-path macs = dict:new() :: dict:dict(),%Macros (don't care locations) uses = dict:new() :: dict:dict(),%Macro use structure + default_encoding = ?DEFAULT_ENCODING :: source_encoding(), pre_opened = false :: boolean() }). @@ -58,6 +61,7 @@ %%% distinction in the internal representation would simplify the code %%% a little. +%% open(Options) %% open(FileName, IncludePath) %% open(FileName, IncludePath, PreDefMacros) %% open(FileName, IoDevice, StartLocation, IncludePath, PreDefMacros) @@ -65,6 +69,7 @@ %% scan_erl_form(Epp) %% parse_erl_form(Epp) %% parse_file(Epp) +%% parse_file(FileName, Options) %% parse_file(FileName, IncludePath, PreDefMacros) %% macro_defs(Epp) @@ -87,14 +92,43 @@ open(Name, Path) -> ErrorDescriptor :: term(). open(Name, Path, Pdm) -> - Self = self(), - Epp = spawn(fun() -> server(Self, Name, Path, Pdm) end), - epp_request(Epp). + internal_open([{name, Name}, {includes, Path}, {macros, Pdm}], #epp{}). open(Name, File, StartLocation, Path, Pdm) -> - Self = self(), - Epp = spawn(fun() -> server(Self, Name, File, StartLocation,Path,Pdm) end), - epp_request(Epp). + internal_open([{name, Name}, {includes, Path}, {macros, Pdm}], + #epp{file=File, pre_opened=true, location=StartLocation}). + +-spec open(Options) -> + {'ok', Epp} | {'ok', Epp, Extra} | {'error', ErrorDescriptor} when + Options :: [{'default_encoding', DefEncoding :: source_encoding()} | + {'includes', IncludePath :: [DirectoryName :: file:name()]} | + {'macros', PredefMacros :: macros()} | + {'name',FileName :: file:name()} | + 'extra'], + Epp :: epp_handle(), + Extra :: [{'encoding', source_encoding() | 'none'}], + ErrorDescriptor :: term(). + +open(Options) -> + internal_open(Options, #epp{}). + +internal_open(Options, St) -> + case proplists:get_value(name, Options) of + undefined -> + erlang:error(badarg); + Name -> + Self = self(), + Epp = spawn(fun() -> server(Self, Name, Options, St) end), + case epp_request(Epp) of + {ok, Pid, Encoding} -> + case proplists:get_bool(extra, Options) of + true -> {ok, Pid, [{encoding, Encoding}]}; + false -> {ok, Pid} + end; + Other -> + Other + end + end. -spec close(Epp) -> 'ok' when Epp :: epp_handle(). @@ -170,9 +204,6 @@ format_error({'NYI',What}) -> io_lib:format("not yet implemented '~s'", [What]); format_error(E) -> file:format_error(E). -%% parse_file(FileName, IncludePath, [PreDefMacro]) -> -%% {ok,[Form]} | {error,OpenError} - -spec parse_file(FileName, IncludePath, PredefMacros) -> {'ok', [Form]} | {error, OpenError} when FileName :: file:name(), @@ -184,17 +215,40 @@ format_error(E) -> file:format_error(E). OpenError :: file:posix() | badarg | system_limit. parse_file(Ifile, Path, Predefs) -> - case open(Ifile, Path, Predefs) of + parse_file(Ifile, [{includes, Path}, {macros, Predefs}]). + +-spec parse_file(FileName, Options) -> + {'ok', [Form]} | {'ok', [Form], Extra} | {error, OpenError} when + FileName :: file:name(), + Options :: [{'includes', IncludePath :: [DirectoryName :: file:name()]} | + {'macros', PredefMacros :: macros()} | + {'default_encoding', DefEncoding :: source_encoding()} | + 'extra'], + Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line}, + Line :: erl_scan:line(), + ErrorInfo :: erl_scan:error_info() | erl_parse:error_info(), + Extra :: [{'encoding', source_encoding() | 'none'}], + OpenError :: file:posix() | badarg | system_limit. + +parse_file(Ifile, Options) -> + case internal_open([{name, Ifile} | Options], #epp{}) of {ok,Epp} -> Forms = parse_file(Epp), close(Epp), {ok,Forms}; + {ok,Epp,Extra} -> + Forms = parse_file(Epp), + close(Epp), + {ok,Forms,Extra}; {error,E} -> {error,E} end. -%% parse_file(Epp) -> -%% [Form] +-spec parse_file(Epp) -> [Form] when + Epp :: epp_handle(), + Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line}, + Line :: erl_scan:line(), + ErrorInfo :: erl_scan:error_info() | erl_parse:error_info(). parse_file(Epp) -> case parse_erl_form(Epp) of @@ -219,8 +273,6 @@ parse_file(Epp) -> [{eof,Location}] end. --define(DEFAULT_ENCODING, utf8). - -spec default_encoding() -> source_encoding(). default_encoding() -> @@ -258,9 +310,16 @@ read_encoding(Name, Options) -> File :: io:device(). % pid(); raw files don't work set_encoding(File) -> + set_encoding(File, ?DEFAULT_ENCODING). + +-spec set_encoding(File, Default) -> source_encoding() | none when + Default :: source_encoding(), + File :: io:device(). % pid(); raw files don't work + +set_encoding(File, Default) -> Encoding = read_encoding_from_file(File, true), Enc = case Encoding of - none -> default_encoding(); + none -> Default; Encoding -> Encoding end, ok = io:setopts(File, [{encoding, Enc}]), @@ -446,35 +505,37 @@ restore_typed_record_fields([{attribute,La,type,{{record,Record},Fields,[]}}| restore_typed_record_fields([Form|Forms]) -> [Form|restore_typed_record_fields(Forms)]. -%% server(StarterPid, FileName, Path, PreDefMacros) - -server(Pid, Name, Path, Pdm) -> +server(Pid, Name, Options, #epp{pre_opened=PreOpened}=St) -> process_flag(trap_exit, true), - case file:open(Name, [read]) of - {ok,File} -> - Location = 1, - init_server(Pid, Name, File, Location, Path, Pdm, false); - {error,E} -> - epp_reply(Pid, {error,E}) + case PreOpened of + false -> + case file:open(Name, [read]) of + {ok,File} -> + init_server(Pid, Name, Options, St#epp{file = File}); + {error,E} -> + epp_reply(Pid, {error,E}) + end; + true -> + init_server(Pid, Name, Options, St) end. -%% server(StarterPid, FileName, IoDevice, Location, Path, PreDefMacros) -server(Pid, Name, File, AtLocation, Path, Pdm) -> - process_flag(trap_exit, true), - init_server(Pid, Name, File, AtLocation, Path, Pdm, true). - -init_server(Pid, Name, File, AtLocation, Path, Pdm, Pre) -> +init_server(Pid, Name, Options, St0) -> + Pdm = proplists:get_value(macros, Options, []), Ms0 = predef_macros(Name), case user_predef(Pdm, Ms0) of {ok,Ms1} -> - _ = set_encoding(File), - epp_reply(Pid, {ok,self()}), + #epp{file = File, location = AtLocation} = St0, + DefEncoding = proplists:get_value(default_encoding, Options, + ?DEFAULT_ENCODING), + Encoding = set_encoding(File, DefEncoding), + epp_reply(Pid, {ok,self(),Encoding}), %% ensure directory of current source file is %% first in path - Path1 = [filename:dirname(Name) | Path], - St = #epp{file=File, location=AtLocation, delta=0, - name=Name, name2=Name, path=Path1, macs=Ms1, - pre_opened = Pre}, + Path = [filename:dirname(Name) | + proplists:get_value(includes, Options, [])], + St = St0#epp{delta=0, name=Name, name2=Name, + path=Path, macs=Ms1, + default_encoding=DefEncoding}, From = wait_request(St), enter_file_reply(From, Name, AtLocation, AtLocation), wait_req_scan(St); @@ -600,9 +661,11 @@ enter_file2(NewF, Pname, From, St0, AtLocation) -> %% the path) must be dropped, otherwise the path used within the current %% file will depend on the order of file inclusions in the parent files Path = [filename:dirname(Pname) | tl(St0#epp.path)], - _ = set_encoding(NewF), + DefEncoding = St0#epp.default_encoding, + _ = set_encoding(NewF, DefEncoding), #epp{file=NewF,location=Loc,name=Pname,name2=Pname,delta=0, - sstk=[St0|St0#epp.sstk],path=Path,macs=Ms}. + sstk=[St0|St0#epp.sstk],path=Path,macs=Ms, + default_encoding=DefEncoding}. enter_file_reply(From, Name, Location, AtLocation) -> Attr = loc_attr(AtLocation), diff --git a/lib/stdlib/test/epp_SUITE.erl b/lib/stdlib/test/epp_SUITE.erl index 0b4726c07a..b17e8bd186 100644 --- a/lib/stdlib/test/epp_SUITE.erl +++ b/lib/stdlib/test/epp_SUITE.erl @@ -26,7 +26,7 @@ pmod/1, not_circular/1, skip_header/1, otp_6277/1, otp_7702/1, otp_8130/1, overload_mac/1, otp_8388/1, otp_8470/1, otp_8503/1, otp_8562/1, otp_8665/1, otp_8911/1, otp_10302/1, otp_10820/1, - otp_11728/1]). + otp_11728/1, encoding/1]). -export([epp_parse_erl_form/2]). @@ -68,7 +68,8 @@ all() -> {group, variable}, otp_4870, otp_4871, otp_5362, pmod, not_circular, skip_header, otp_6277, otp_7702, otp_8130, overload_mac, otp_8388, otp_8470, otp_8503, otp_8562, - otp_8665, otp_8911, otp_10302, otp_10820, otp_11728]. + otp_8665, otp_8911, otp_10302, otp_10820, otp_11728, + encoding]. groups() -> [{upcase_mac, [], [upcase_mac_1, upcase_mac_2]}, @@ -123,10 +124,22 @@ include_local(Config) when is_list(Config) -> %%% regular epp:parse_file, the test case will time out, and then epp %%% server will go on growing until we dump core. epp_parse_file(File, Inc, Predef) -> - {ok, Epp} = epp:open(File, Inc, Predef), + List = do_epp_parse_file(fun() -> + epp:open(File, Inc, Predef) + end), + List = do_epp_parse_file(fun() -> + Opts = [{name, File}, + {includes, Inc}, + {macros, Predef}], + epp:open(Opts) + end), + {ok, List}. + +do_epp_parse_file(Open) -> + {ok, Epp} = Open(), List = collect_epp_forms(Epp), epp:close(Epp), - {ok, List}. + List. collect_epp_forms(Epp) -> Result = epp_parse_erl_form(Epp), @@ -1413,6 +1426,63 @@ otp_11728(Config) when is_list(Config) -> _ = file:delete(ErlFile), ok. +%% Check the new API for setting the default encoding. +encoding(Config) when is_list(Config) -> + Dir = ?config(priv_dir, Config), + ErlFile = filename:join(Dir, "encoding.erl"), + + %% Try a latin-1 file with no encoding given. + C1 = <<"-module(encoding). + %% ",246," + ">>, + ok = file:write_file(ErlFile, C1), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {error,_}, + {error,{2,epp,cannot_parse}}, + {eof,2}]} = epp:parse_file(ErlFile, []), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,3}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,3}],[{encoding,none}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1},extra]), + + %% Try a latin-1 file with encoding given in a comment. + C2 = <<"-module(encoding). + %% encoding: latin-1 + %% ",246," + ">>, + ok = file:write_file(ErlFile, C2), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, []), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}]} = + epp:parse_file(ErlFile, [{default_encoding,utf8}]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [extra]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [{default_encoding,latin1},extra]), + {ok,[{attribute,1,file,_}, + {attribute,1,module,encoding}, + {eof,4}],[{encoding,latin1}]} = + epp:parse_file(ErlFile, [{default_encoding,utf8},extra]), + ok. + + check(Config, Tests) -> eval_tests(Config, fun check_test/2, Tests). -- cgit v1.2.3