aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-03-03 13:50:23 +0100
committerBjörn Gustavsson <[email protected]>2014-03-18 17:47:51 +0100
commitf3cee0e9f409c5850709f11ba15cec22d7387401 (patch)
tree7a8f41eba7505f54d3a22c94fb625e04834c4372
parent237264bc018b0cc17afeac5d3f6030073f314f9d (diff)
downloadotp-f3cee0e9f409c5850709f11ba15cec22d7387401.tar.gz
otp-f3cee0e9f409c5850709f11ba15cec22d7387401.tar.bz2
otp-f3cee0e9f409c5850709f11ba15cec22d7387401.zip
epp: Make it possible to specify a default encoding
In the next commit, we will need a way to tell epp which the default encoding should be for files that have no encoding comment. We could add new open() and parse_file() functions with one extra argument for the encoding, but there are already too many variants. To avoid having to add an additional argument to epp:open() and epp:parse_file() each time new options are needed, introduce epp:open/1 and epp:parse_file/2 that takes a property list with options. Also support the new 'default_encoding' option for specifying the default encoding for source files. Thanks to Richard Carlsson for the idea and the implementation of the new functionality in epp.erl.
-rw-r--r--lib/stdlib/doc/src/epp.xml56
-rw-r--r--lib/stdlib/src/epp.erl147
-rw-r--r--lib/stdlib/test/epp_SUITE.erl78
3 files changed, 230 insertions, 51 deletions
diff --git a/lib/stdlib/doc/src/epp.xml b/lib/stdlib/doc/src/epp.xml
index cf33530395..50baad04a9 100644
--- a/lib/stdlib/doc/src/epp.xml
+++ b/lib/stdlib/doc/src/epp.xml
@@ -64,11 +64,29 @@
</datatypes>
<funcs>
<func>
+ <name name="open" arity="1"/>
+ <fsummary>Open a file for preprocessing</fsummary>
+ <desc>
+ <p>Opens a file for preprocessing.</p>
+ <p>If <c>extra</c> is given in
+ <c><anno>Options</anno></c>, the return value will be
+ <c>{ok, <anno>Epp</anno>, <anno>Extra</anno>}</c> instead
+ of <c>{ok, <anno>Epp</anno>}</c>.</p>
+ </desc>
+ </func>
+ <func>
<name name="open" arity="2"/>
+ <fsummary>Open a file for preprocessing</fsummary>
+ <desc>
+ <p>Equivalent to <c>epp:open([{name, FileName}, {includes, IncludePath}])</c>.</p>
+ </desc>
+ </func>
+ <func>
<name name="open" arity="3"/>
<fsummary>Open a file for preprocessing</fsummary>
<desc>
- <p>Opens a file for preprocessing.</p>
+ <p>Equivalent to <c>epp:open([{name, FileName}, {includes, IncludePath},
+ {macros, PredefMacros}])</c>.</p>
</desc>
</func>
<func>
@@ -89,12 +107,24 @@
</desc>
</func>
<func>
- <name name="parse_file" arity="3"/>
+ <name name="parse_file" arity="2"/>
<fsummary>Preprocess and parse an Erlang source file</fsummary>
<desc>
<p>Preprocesses and parses an Erlang source file.
- Note that the tuple <c>{eof, <anno>Line</anno>}</c> returned at end-of-file is
- included as a "form".</p>
+ Note that the tuple <c>{eof, <anno>Line</anno>}</c> returned
+ at end-of-file is included as a "form".</p>
+ <p>If <c>extra</c> is given in
+ <c><anno>Options</anno></c>, the return value will be
+ <c>{ok, [<anno>Form</anno>], <anno>Extra</anno>}</c> instead
+ of <c>{ok, [<anno>Form</anno>]}</c>.</p>
+ </desc>
+ </func>
+ <func>
+ <name name="parse_file" arity="3"/>
+ <fsummary>Preprocess and parse an Erlang source file</fsummary>
+ <desc>
+ <p>Equivalent to <c>epp:parse_file(FileName, [{includes, IncludePath},
+ {macros, PredefMacros}])</c>.</p>
</desc>
</func>
<func>
@@ -111,7 +141,7 @@
<p>Returns a string representation of an encoding. The string
is recognized by <c>read_encoding/1,2</c>,
<c>read_encoding_from_binary/1,2</c>, and
- <c>set_encoding/1</c> as a valid encoding.</p>
+ <c>set_encoding/1,2</c> as a valid encoding.</p>
</desc>
</func>
<func>
@@ -157,6 +187,22 @@
</desc>
</func>
<func>
+ <name name="set_encoding" arity="2"/>
+ <fsummary>Read and set the encoding of an IO device</fsummary>
+ <desc>
+ <p>Reads the <seealso marker="#encoding">encoding</seealso> from
+ an IO device and sets the encoding of the device
+ accordingly. The position of the IO device referenced by
+ <c><anno>File</anno></c> is not affected. If no valid
+ encoding can be read from the IO device the encoding of the
+ IO device is set to the
+ <seealso marker="#encoding">encoding</seealso> given by
+ <c><anno>Default</anno></c>.</p>
+ <p>Returns the read encoding, or <c>none</c> if no valid
+ encoding was found.</p>
+ </desc>
+ </func>
+ <func>
<name name="format_error" arity="1"/>
<fsummary>Format an error descriptor</fsummary>
<desc>
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index 68e079b7e5..d212a55b47 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -20,12 +20,12 @@
%% An Erlang code preprocessor.
--export([open/2,open/3,open/5,close/1,format_error/1]).
+-export([open/1, open/2,open/3,open/5,close/1,format_error/1]).
-export([scan_erl_form/1,parse_erl_form/1,macro_defs/1]).
--export([parse_file/1, parse_file/3]).
+-export([parse_file/1, parse_file/2, parse_file/3]).
-export([default_encoding/0, encoding_to_string/1,
read_encoding_from_binary/1, read_encoding_from_binary/2,
- set_encoding/1, read_encoding/1, read_encoding/2]).
+ set_encoding/1, set_encoding/2, read_encoding/1, read_encoding/2]).
-export([interpret_file_attribute/1]).
-export([normalize_typed_record_fields/1,restore_typed_record_fields/1]).
@@ -37,9 +37,11 @@
-type epp_handle() :: pid().
-type source_encoding() :: latin1 | utf8.
+-define(DEFAULT_ENCODING, utf8).
+
%% Epp state record.
-record(epp, {file, %Current file
- location, %Current location
+ location=1, %Current location
delta, %Offset from Location (-file)
name="", %Current file name
name2="", %-"-, modified by -file
@@ -48,6 +50,7 @@
path=[], %Include-path
macs = dict:new() :: dict:dict(),%Macros (don't care locations)
uses = dict:new() :: dict:dict(),%Macro use structure
+ default_encoding = ?DEFAULT_ENCODING :: source_encoding(),
pre_opened = false :: boolean()
}).
@@ -58,6 +61,7 @@
%%% distinction in the internal representation would simplify the code
%%% a little.
+%% open(Options)
%% open(FileName, IncludePath)
%% open(FileName, IncludePath, PreDefMacros)
%% open(FileName, IoDevice, StartLocation, IncludePath, PreDefMacros)
@@ -65,6 +69,7 @@
%% scan_erl_form(Epp)
%% parse_erl_form(Epp)
%% parse_file(Epp)
+%% parse_file(FileName, Options)
%% parse_file(FileName, IncludePath, PreDefMacros)
%% macro_defs(Epp)
@@ -87,14 +92,43 @@ open(Name, Path) ->
ErrorDescriptor :: term().
open(Name, Path, Pdm) ->
- Self = self(),
- Epp = spawn(fun() -> server(Self, Name, Path, Pdm) end),
- epp_request(Epp).
+ internal_open([{name, Name}, {includes, Path}, {macros, Pdm}], #epp{}).
open(Name, File, StartLocation, Path, Pdm) ->
- Self = self(),
- Epp = spawn(fun() -> server(Self, Name, File, StartLocation,Path,Pdm) end),
- epp_request(Epp).
+ internal_open([{name, Name}, {includes, Path}, {macros, Pdm}],
+ #epp{file=File, pre_opened=true, location=StartLocation}).
+
+-spec open(Options) ->
+ {'ok', Epp} | {'ok', Epp, Extra} | {'error', ErrorDescriptor} when
+ Options :: [{'default_encoding', DefEncoding :: source_encoding()} |
+ {'includes', IncludePath :: [DirectoryName :: file:name()]} |
+ {'macros', PredefMacros :: macros()} |
+ {'name',FileName :: file:name()} |
+ 'extra'],
+ Epp :: epp_handle(),
+ Extra :: [{'encoding', source_encoding() | 'none'}],
+ ErrorDescriptor :: term().
+
+open(Options) ->
+ internal_open(Options, #epp{}).
+
+internal_open(Options, St) ->
+ case proplists:get_value(name, Options) of
+ undefined ->
+ erlang:error(badarg);
+ Name ->
+ Self = self(),
+ Epp = spawn(fun() -> server(Self, Name, Options, St) end),
+ case epp_request(Epp) of
+ {ok, Pid, Encoding} ->
+ case proplists:get_bool(extra, Options) of
+ true -> {ok, Pid, [{encoding, Encoding}]};
+ false -> {ok, Pid}
+ end;
+ Other ->
+ Other
+ end
+ end.
-spec close(Epp) -> 'ok' when
Epp :: epp_handle().
@@ -170,9 +204,6 @@ format_error({'NYI',What}) ->
io_lib:format("not yet implemented '~s'", [What]);
format_error(E) -> file:format_error(E).
-%% parse_file(FileName, IncludePath, [PreDefMacro]) ->
-%% {ok,[Form]} | {error,OpenError}
-
-spec parse_file(FileName, IncludePath, PredefMacros) ->
{'ok', [Form]} | {error, OpenError} when
FileName :: file:name(),
@@ -184,17 +215,40 @@ format_error(E) -> file:format_error(E).
OpenError :: file:posix() | badarg | system_limit.
parse_file(Ifile, Path, Predefs) ->
- case open(Ifile, Path, Predefs) of
+ parse_file(Ifile, [{includes, Path}, {macros, Predefs}]).
+
+-spec parse_file(FileName, Options) ->
+ {'ok', [Form]} | {'ok', [Form], Extra} | {error, OpenError} when
+ FileName :: file:name(),
+ Options :: [{'includes', IncludePath :: [DirectoryName :: file:name()]} |
+ {'macros', PredefMacros :: macros()} |
+ {'default_encoding', DefEncoding :: source_encoding()} |
+ 'extra'],
+ Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line},
+ Line :: erl_scan:line(),
+ ErrorInfo :: erl_scan:error_info() | erl_parse:error_info(),
+ Extra :: [{'encoding', source_encoding() | 'none'}],
+ OpenError :: file:posix() | badarg | system_limit.
+
+parse_file(Ifile, Options) ->
+ case internal_open([{name, Ifile} | Options], #epp{}) of
{ok,Epp} ->
Forms = parse_file(Epp),
close(Epp),
{ok,Forms};
+ {ok,Epp,Extra} ->
+ Forms = parse_file(Epp),
+ close(Epp),
+ {ok,Forms,Extra};
{error,E} ->
{error,E}
end.
-%% parse_file(Epp) ->
-%% [Form]
+-spec parse_file(Epp) -> [Form] when
+ Epp :: epp_handle(),
+ Form :: erl_parse:abstract_form() | {'error', ErrorInfo} | {'eof',Line},
+ Line :: erl_scan:line(),
+ ErrorInfo :: erl_scan:error_info() | erl_parse:error_info().
parse_file(Epp) ->
case parse_erl_form(Epp) of
@@ -219,8 +273,6 @@ parse_file(Epp) ->
[{eof,Location}]
end.
--define(DEFAULT_ENCODING, utf8).
-
-spec default_encoding() -> source_encoding().
default_encoding() ->
@@ -258,9 +310,16 @@ read_encoding(Name, Options) ->
File :: io:device(). % pid(); raw files don't work
set_encoding(File) ->
+ set_encoding(File, ?DEFAULT_ENCODING).
+
+-spec set_encoding(File, Default) -> source_encoding() | none when
+ Default :: source_encoding(),
+ File :: io:device(). % pid(); raw files don't work
+
+set_encoding(File, Default) ->
Encoding = read_encoding_from_file(File, true),
Enc = case Encoding of
- none -> default_encoding();
+ none -> Default;
Encoding -> Encoding
end,
ok = io:setopts(File, [{encoding, Enc}]),
@@ -446,35 +505,37 @@ restore_typed_record_fields([{attribute,La,type,{{record,Record},Fields,[]}}|
restore_typed_record_fields([Form|Forms]) ->
[Form|restore_typed_record_fields(Forms)].
-%% server(StarterPid, FileName, Path, PreDefMacros)
-
-server(Pid, Name, Path, Pdm) ->
+server(Pid, Name, Options, #epp{pre_opened=PreOpened}=St) ->
process_flag(trap_exit, true),
- case file:open(Name, [read]) of
- {ok,File} ->
- Location = 1,
- init_server(Pid, Name, File, Location, Path, Pdm, false);
- {error,E} ->
- epp_reply(Pid, {error,E})
+ case PreOpened of
+ false ->
+ case file:open(Name, [read]) of
+ {ok,File} ->
+ init_server(Pid, Name, Options, St#epp{file = File});
+ {error,E} ->
+ epp_reply(Pid, {error,E})
+ end;
+ true ->
+ init_server(Pid, Name, Options, St)
end.
-%% server(StarterPid, FileName, IoDevice, Location, Path, PreDefMacros)
-server(Pid, Name, File, AtLocation, Path, Pdm) ->
- process_flag(trap_exit, true),
- init_server(Pid, Name, File, AtLocation, Path, Pdm, true).
-
-init_server(Pid, Name, File, AtLocation, Path, Pdm, Pre) ->
+init_server(Pid, Name, Options, St0) ->
+ Pdm = proplists:get_value(macros, Options, []),
Ms0 = predef_macros(Name),
case user_predef(Pdm, Ms0) of
{ok,Ms1} ->
- _ = set_encoding(File),
- epp_reply(Pid, {ok,self()}),
+ #epp{file = File, location = AtLocation} = St0,
+ DefEncoding = proplists:get_value(default_encoding, Options,
+ ?DEFAULT_ENCODING),
+ Encoding = set_encoding(File, DefEncoding),
+ epp_reply(Pid, {ok,self(),Encoding}),
%% ensure directory of current source file is
%% first in path
- Path1 = [filename:dirname(Name) | Path],
- St = #epp{file=File, location=AtLocation, delta=0,
- name=Name, name2=Name, path=Path1, macs=Ms1,
- pre_opened = Pre},
+ Path = [filename:dirname(Name) |
+ proplists:get_value(includes, Options, [])],
+ St = St0#epp{delta=0, name=Name, name2=Name,
+ path=Path, macs=Ms1,
+ default_encoding=DefEncoding},
From = wait_request(St),
enter_file_reply(From, Name, AtLocation, AtLocation),
wait_req_scan(St);
@@ -600,9 +661,11 @@ enter_file2(NewF, Pname, From, St0, AtLocation) ->
%% the path) must be dropped, otherwise the path used within the current
%% file will depend on the order of file inclusions in the parent files
Path = [filename:dirname(Pname) | tl(St0#epp.path)],
- _ = set_encoding(NewF),
+ DefEncoding = St0#epp.default_encoding,
+ _ = set_encoding(NewF, DefEncoding),
#epp{file=NewF,location=Loc,name=Pname,name2=Pname,delta=0,
- sstk=[St0|St0#epp.sstk],path=Path,macs=Ms}.
+ sstk=[St0|St0#epp.sstk],path=Path,macs=Ms,
+ default_encoding=DefEncoding}.
enter_file_reply(From, Name, Location, AtLocation) ->
Attr = loc_attr(AtLocation),
diff --git a/lib/stdlib/test/epp_SUITE.erl b/lib/stdlib/test/epp_SUITE.erl
index 0b4726c07a..b17e8bd186 100644
--- a/lib/stdlib/test/epp_SUITE.erl
+++ b/lib/stdlib/test/epp_SUITE.erl
@@ -26,7 +26,7 @@
pmod/1, not_circular/1, skip_header/1, otp_6277/1, otp_7702/1,
otp_8130/1, overload_mac/1, otp_8388/1, otp_8470/1, otp_8503/1,
otp_8562/1, otp_8665/1, otp_8911/1, otp_10302/1, otp_10820/1,
- otp_11728/1]).
+ otp_11728/1, encoding/1]).
-export([epp_parse_erl_form/2]).
@@ -68,7 +68,8 @@ all() ->
{group, variable}, otp_4870, otp_4871, otp_5362, pmod,
not_circular, skip_header, otp_6277, otp_7702, otp_8130,
overload_mac, otp_8388, otp_8470, otp_8503, otp_8562,
- otp_8665, otp_8911, otp_10302, otp_10820, otp_11728].
+ otp_8665, otp_8911, otp_10302, otp_10820, otp_11728,
+ encoding].
groups() ->
[{upcase_mac, [], [upcase_mac_1, upcase_mac_2]},
@@ -123,10 +124,22 @@ include_local(Config) when is_list(Config) ->
%%% regular epp:parse_file, the test case will time out, and then epp
%%% server will go on growing until we dump core.
epp_parse_file(File, Inc, Predef) ->
- {ok, Epp} = epp:open(File, Inc, Predef),
+ List = do_epp_parse_file(fun() ->
+ epp:open(File, Inc, Predef)
+ end),
+ List = do_epp_parse_file(fun() ->
+ Opts = [{name, File},
+ {includes, Inc},
+ {macros, Predef}],
+ epp:open(Opts)
+ end),
+ {ok, List}.
+
+do_epp_parse_file(Open) ->
+ {ok, Epp} = Open(),
List = collect_epp_forms(Epp),
epp:close(Epp),
- {ok, List}.
+ List.
collect_epp_forms(Epp) ->
Result = epp_parse_erl_form(Epp),
@@ -1413,6 +1426,63 @@ otp_11728(Config) when is_list(Config) ->
_ = file:delete(ErlFile),
ok.
+%% Check the new API for setting the default encoding.
+encoding(Config) when is_list(Config) ->
+ Dir = ?config(priv_dir, Config),
+ ErlFile = filename:join(Dir, "encoding.erl"),
+
+ %% Try a latin-1 file with no encoding given.
+ C1 = <<"-module(encoding).
+ %% ",246,"
+ ">>,
+ ok = file:write_file(ErlFile, C1),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {error,_},
+ {error,{2,epp,cannot_parse}},
+ {eof,2}]} = epp:parse_file(ErlFile, []),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,3}]} =
+ epp:parse_file(ErlFile, [{default_encoding,latin1}]),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,3}],[{encoding,none}]} =
+ epp:parse_file(ErlFile, [{default_encoding,latin1},extra]),
+
+ %% Try a latin-1 file with encoding given in a comment.
+ C2 = <<"-module(encoding).
+ %% encoding: latin-1
+ %% ",246,"
+ ">>,
+ ok = file:write_file(ErlFile, C2),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}]} =
+ epp:parse_file(ErlFile, []),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}]} =
+ epp:parse_file(ErlFile, [{default_encoding,latin1}]),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}]} =
+ epp:parse_file(ErlFile, [{default_encoding,utf8}]),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}],[{encoding,latin1}]} =
+ epp:parse_file(ErlFile, [extra]),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}],[{encoding,latin1}]} =
+ epp:parse_file(ErlFile, [{default_encoding,latin1},extra]),
+ {ok,[{attribute,1,file,_},
+ {attribute,1,module,encoding},
+ {eof,4}],[{encoding,latin1}]} =
+ epp:parse_file(ErlFile, [{default_encoding,utf8},extra]),
+ ok.
+
+
check(Config, Tests) ->
eval_tests(Config, fun check_test/2, Tests).