aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2012-12-03 12:38:07 +0100
committerHans Bolinder <[email protected]>2013-01-02 10:15:17 +0100
commit0dcd574b493daa864e22a8332d11be9945466cb6 (patch)
treebe495db057f1e2a2969e60fa3d8c0d1390b476a3
parent7f00144160c8e0202cfe7368830bdd472bc73505 (diff)
downloadotp-0dcd574b493daa864e22a8332d11be9945466cb6.tar.gz
otp-0dcd574b493daa864e22a8332d11be9945466cb6.tar.bz2
otp-0dcd574b493daa864e22a8332d11be9945466cb6.zip
[syntax_tools] Introduce Unicode support for Erlang source files
Not complete. Unicode in wild attribute doesn't work. No support for Unicode regarding Igor stubs.
-rw-r--r--lib/syntax_tools/src/epp_dodger.erl4
-rw-r--r--lib/syntax_tools/src/erl_comment_scan.erl12
-rw-r--r--lib/syntax_tools/src/erl_prettypr.erl14
-rw-r--r--lib/syntax_tools/src/erl_syntax.erl48
-rw-r--r--lib/syntax_tools/src/erl_tidy.erl11
-rw-r--r--lib/syntax_tools/src/igor.erl55
6 files changed, 116 insertions, 28 deletions
diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl
index b3ced34c14..70395848a1 100644
--- a/lib/syntax_tools/src/epp_dodger.erl
+++ b/lib/syntax_tools/src/epp_dodger.erl
@@ -186,6 +186,7 @@ quick_parse_file(File, Options) ->
parse_file(File, Parser, Options) ->
case file:open(File, [read]) of
{ok, Dev} ->
+ _ = epp:set_encoding(Dev),
try Parser(Dev, 1, Options)
after ok = file:close(Dev)
end;
@@ -400,7 +401,7 @@ quick_parse_form(Dev, L0, Options) ->
parse_form(Dev, L0, Parser, Options) ->
NoFail = proplists:get_bool(no_fail, Options),
Opt = #opt{clever = proplists:get_bool(clever, Options)},
- case io:scan_erl_form(Dev, "", L0) of
+ case io:scan_erl_form(Dev, "", L0, [unicode]) of
{ok, Ts, L1} ->
case catch {ok, Parser(Ts, Opt)} of
{'EXIT', Term} ->
@@ -419,6 +420,7 @@ parse_form(Dev, L0, Parser, Options) ->
{ok, F, L1}
end;
{error, _IoErr, _L1} = Err -> Err;
+ {error, _Reason} -> {eof, L0}; % This is probably encoding problem
{eof, _L1} = Eof -> Eof
end.
diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl
index b833e1c069..a70e7ba413 100644
--- a/lib/syntax_tools/src/erl_comment_scan.erl
+++ b/lib/syntax_tools/src/erl_comment_scan.erl
@@ -72,7 +72,17 @@ file(Name) ->
{ok, V} ->
case V of
{ok, B} ->
- string(binary_to_list(B));
+ Enc = case epp:read_encoding(Name) of
+ none -> epp:default_encoding();
+ Enc0 -> Enc0
+ end,
+ case catch unicode:characters_to_list(B, Enc) of
+ String when is_list(String) ->
+ string(String);
+ R ->
+ error_read_file(Name1),
+ exit(R)
+ end;
{error, E} ->
error_read_file(Name1),
exit({read, E})
diff --git a/lib/syntax_tools/src/erl_prettypr.erl b/lib/syntax_tools/src/erl_prettypr.erl
index f4bbf975c3..577dd21a77 100644
--- a/lib/syntax_tools/src/erl_prettypr.erl
+++ b/lib/syntax_tools/src/erl_prettypr.erl
@@ -60,7 +60,9 @@
hook = ?NOHOOK :: hook(),
paper = ?PAPER :: integer(),
ribbon = ?RIBBON :: integer(),
- user = ?NOUSER :: term()}).
+ user = ?NOUSER :: term(),
+ encoding = epp:default_encoding() :: epp:source_encoding()}).
+
-type context() :: #ctxt{}.
%% =====================================================================
@@ -231,6 +233,8 @@ format(Node) ->
%% <dt>{user, term()}</dt>
%% <dd>User-specific data for use in hook functions. The default
%% value is `undefined'.</dd>
+%% <dt>{encoding, epp:source_encoding()}</dt>
+%% <dd>Specifies the encoding of the generated file.</dd>
%% </dl>
%%
%% A hook function (cf. the {@link hook()} type) is passed the current
@@ -342,7 +346,9 @@ layout(Node, Options) ->
#ctxt{hook = proplists:get_value(hook, Options, ?NOHOOK),
paper = proplists:get_value(paper, Options, ?PAPER),
ribbon = proplists:get_value(ribbon, Options, ?RIBBON),
- user = proplists:get_value(user, Options)}).
+ user = proplists:get_value(user, Options),
+ encoding = proplists:get_value(encoding, Options,
+ epp:default_encoding())}).
lay(Node, Ctxt) ->
case erl_syntax:get_ann(Node) of
@@ -445,10 +451,10 @@ lay_2(Node, Ctxt) ->
text(tidy_float(erl_syntax:float_literal(Node)));
char ->
- text(erl_syntax:char_literal(Node));
+ text(erl_syntax:char_literal(Node, Ctxt#ctxt.encoding));
string ->
- lay_string(erl_syntax:string_literal(Node), Ctxt);
+ lay_string(erl_syntax:string_literal(Node, Ctxt#ctxt.encoding), Ctxt);
nil ->
text("[]");
diff --git a/lib/syntax_tools/src/erl_syntax.erl b/lib/syntax_tools/src/erl_syntax.erl
index 151f04b03b..93b9dc54dd 100644
--- a/lib/syntax_tools/src/erl_syntax.erl
+++ b/lib/syntax_tools/src/erl_syntax.erl
@@ -161,6 +161,7 @@
is_char/2,
char_value/1,
char_literal/1,
+ char_literal/2,
clause/2,
clause/3,
clause_body/1,
@@ -271,6 +272,7 @@
is_string/2,
string_value/1,
string_literal/1,
+ string_literal/2,
text/1,
text_string/1,
try_expr/2,
@@ -1628,6 +1630,7 @@ float_literal(Node) ->
%%
%% @see char_value/1
%% @see char_literal/1
+%% @see char_literal/2
%% @see is_char/2
%% type(Node) = char
@@ -1687,13 +1690,34 @@ char_value(Node) ->
%% =====================================================================
%% @doc Returns the literal string represented by a `char'
%% node. This includes the leading "`$'" character.
+%% Characters beyond 255 will be escaped.
%%
%% @see char/1
-spec char_literal(syntaxTree()) -> nonempty_string().
char_literal(Node) ->
- io_lib:write_char(char_value(Node)).
+ char_literal(Node, latin1).
+
+
+%% =====================================================================
+%% @doc Returns the literal string represented by a `char'
+%% node. This includes the leading "`$'" character.
+%% Depending on the encoding a character beyond 255 will be escaped
+%% ('latin1') or copied as is ('utf8').
+%%
+%% @see char/1
+
+-type encoding() :: 'utf8' | 'unicode' | 'latin1'.
+
+-spec char_literal(syntaxTree(), encoding()) -> nonempty_string().
+
+char_literal(Node, unicode) ->
+ io_lib:write_unicode_char(char_value(Node));
+char_literal(Node, utf8) ->
+ io_lib:write_unicode_char(char_value(Node));
+char_literal(Node, latin1) ->
+ io_lib:write_unicode_char_as_latin1(char_value(Node)).
%% =====================================================================
@@ -1708,6 +1732,7 @@ char_literal(Node) ->
%%
%% @see string_value/1
%% @see string_literal/1
+%% @see string_literal/2
%% @see is_string/2
%% @see char/1
@@ -1768,13 +1793,32 @@ string_value(Node) ->
%% =====================================================================
%% @doc Returns the literal string represented by a `string'
%% node. This includes surrounding double-quote characters.
+%% Characters beyond 255 will be escaped.
%%
%% @see string/1
-spec string_literal(syntaxTree()) -> nonempty_string().
string_literal(Node) ->
- io_lib:write_string(string_value(Node)).
+ string_literal(Node, latin1).
+
+
+%% =====================================================================
+%% @doc Returns the literal string represented by a `string'
+%% node. This includes surrounding double-quote characters.
+%% Depending on the encoding characters beyond 255 will be escaped
+%% ('latin1') or copied as is ('utf8').
+%%
+%% @see string/1
+
+-spec string_literal(syntaxTree(), encoding()) -> nonempty_string().
+
+string_literal(Node, utf8) ->
+ io_lib:write_unicode_string(string_value(Node));
+string_literal(Node, unicode) ->
+ io_lib:write_unicode_string(string_value(Node));
+string_literal(Node, latin1) ->
+ io_lib:write_unicode_string_as_latin1(string_value(Node)).
%% =====================================================================
diff --git a/lib/syntax_tools/src/erl_tidy.erl b/lib/syntax_tools/src/erl_tidy.erl
index 59cf6c0a92..e9a88caff3 100644
--- a/lib/syntax_tools/src/erl_tidy.erl
+++ b/lib/syntax_tools/src/erl_tidy.erl
@@ -375,6 +375,8 @@ write_module(Tree, Name, Opts) ->
end,
filename(filename:join(Dir, Name1))
end,
+ Encoding = [{encoding,Enc} || Enc <- [epp:read_encoding(Name)],
+ Enc =/= none],
case proplists:get_bool(backups, Opts) of
true ->
backup_file(File, Opts);
@@ -382,9 +384,9 @@ write_module(Tree, Name, Opts) ->
ok
end,
Printer = proplists:get_value(printer, Opts),
- FD = open_output_file(File),
+ FD = open_output_file(File, Encoding),
verbose("writing to file `~s'.", [File], Opts),
- V = (catch {ok, output(FD, Printer, Tree, Opts)}),
+ V = (catch {ok, output(FD, Printer, Tree, Opts++Encoding)}),
ok = file:close(FD),
case V of
{ok, _} ->
@@ -432,8 +434,9 @@ file_type(Name, Links) ->
throw(R)
end.
-open_output_file(FName) ->
- case catch file:open(FName, [write]) of
+open_output_file(FName, Options) ->
+io:format("Options ~p~n", [Options]),
+ case catch file:open(FName, [write]++Options) of
{ok, FD} ->
FD;
{error, R} ->
diff --git a/lib/syntax_tools/src/igor.erl b/lib/syntax_tools/src/igor.erl
index 37e561cbbe..8abc3f41cb 100644
--- a/lib/syntax_tools/src/igor.erl
+++ b/lib/syntax_tools/src/igor.erl
@@ -341,10 +341,12 @@ merge(Name, Files) ->
merge(Name, Files, Opts) ->
Opts1 = Opts ++ ?DEFAULT_MERGE_OPTS,
- {Tree, Stubs} = merge_files(Name, Files, Opts1),
+ {Sources, Enc} = merge_files1(Files, Opts1),
+ {Tree, Stubs} = merge_sources(Name, Sources, Opts1),
Dir = proplists:get_value(dir, Opts1, ""),
Filename = proplists:get_value(outfile, Opts1, Name),
- File = write_module(Tree, Filename, Dir, Opts1),
+ Encoding = [{encoding, Enc} || Enc =/= none],
+ File = write_module(Tree, Filename, Dir, Encoding ++ Opts1),
[File | maybe_create_stubs(Stubs, Opts1)].
@@ -459,16 +461,21 @@ merge_files(Name, Files, Options) ->
-spec merge_files(atom(), erl_syntax:forms(), [file:filename()], [option()]) ->
{erl_syntax:syntaxTree(), [stubDescriptor()]}.
-merge_files(_, _Trees, [], _) ->
+merge_files(Name, Trees, Files, Opts) ->
+ {Sources, _Encoding} = merge_files1(Files, Opts),
+ merge_sources(Name, Trees ++ Sources, Opts).
+
+merge_files1([], _) ->
report_error("no files to merge."),
exit(badarg);
-merge_files(Name, Trees, Files, Opts) ->
+merge_files1(Files, Opts) ->
Opts1 = Opts ++ [{includes, ?DEFAULT_INCLUDES},
{macros, ?DEFAULT_MACROS},
{preprocess, false},
comments],
- Sources = [read_module(F, Opts1) || F <- Files],
- merge_sources(Name, Trees ++ Sources, Opts1).
+ SourceEncodings = [read_module(F, Opts1) || F <- Files],
+ {Sources, [Encoding | _]} = lists:unzip(SourceEncodings),
+ {Sources, Encoding}.
%% =====================================================================
@@ -2512,7 +2519,11 @@ rename(Files, Renamings, Opts) ->
lists:flatmap(fun (F) -> rename_file(F, Dict, Opts1) end, Files).
rename_file(File, Dict, Opts) ->
- S = read_module(File, Opts),
+ {S, Enc} = read_module(File, Opts),
+ %% Try to avoid *two* coding: comments:
+ Encoding = [{encoding, Enc} ||
+ Enc =/= none,
+ not proplists:get_bool(comments, Opts)],
M = get_module_info(S),
Name = M#module.name,
Name1 = case dict:find(Name, Dict) of
@@ -2526,10 +2537,10 @@ rename_file(File, Dict, Opts) ->
Opts1 = [no_headers,
{export, [Name]},
{static, [Name]},
- {redirect, dict:to_list(Dict1)}] ++ Opts,
+ {redirect, dict:to_list(Dict1)}] ++ Encoding ++ Opts,
{Tree, Stubs} = merge_sources(Name1, [S], Opts1),
Dir = filename:dirname(filename(File)),
- File1 = write_module(Tree, Name1, Dir, Opts),
+ File1 = write_module(Tree, Name1, Dir, Opts++Encoding),
%% We create the stub file in the same directory as the source file
%% and the target file.
@@ -2648,7 +2659,7 @@ error_text(D, Name) ->
{L, M, E} when is_integer(L), is_atom(M) ->
case catch M:format_error(E) of
S when is_list(S) ->
- io_lib:fwrite("`~w', line ~w: ~s.",
+ io_lib:fwrite("`~w', line ~w: ~ts.",
[Name, L, S]);
_ ->
error_text_1(D, Name)
@@ -2706,7 +2717,17 @@ open_output_file(FName) ->
exit(R)
end.
-%% read_module(Name, Options) -> syntaxTree()
+output_encoding(FD, Opts) ->
+ case proplists:get_value(encoding, Opts) of
+ undefined ->
+ ok = io:setopts(FD, [{encoding, epp:default_encoding()}]);
+ Encoding ->
+ ok = io:setopts(FD, [{encoding, Encoding}]),
+ EncS = epp:encoding_to_string(Encoding),
+ ok = io:fwrite(FD, <<"%% ~s\n">>, [EncS])
+ end.
+
+%% read_module(Name, Options) -> {syntaxTree(), epp:source_encoding()}
%%
%% This also tries to locate the real source file, if "Name" does not
%% point directly to a particular file.
@@ -2729,20 +2750,21 @@ read_module(Name, Options) ->
read_module_1(Name, Options) ->
verbose("reading module `~s'.", [filename(Name)], Options),
- Forms = read_module_2(Name, Options),
+ {Forms, Enc} = read_module_2(Name, Options),
case proplists:get_bool(comments, Options) of
false ->
- Forms;
+ {Forms, Enc};
true ->
Comments = erl_comment_scan:file(Name),
- erl_recomment:recomment_forms(Forms, Comments)
+ {erl_recomment:recomment_forms(Forms, Comments), Enc}
end.
read_module_2(Name, Options) ->
case read_module_3(Name, Options) of
{ok, Forms} ->
check_forms(Forms, Name),
- Forms;
+ Enc = epp:read_encoding(Name),
+ {Forms, Enc};
{error, _} = Error ->
error_read_file(Name),
exit(Error)
@@ -2772,7 +2794,7 @@ check_forms([F | Fs], File) ->
_ ->
"unknown error"
end,
- report_error("in file `~s' at line ~w:\n ~s",
+ report_error("in file `~s' at line ~w:\n ~ts",
[filename(File), erl_syntax:get_pos(F), S]),
exit(error);
_ ->
@@ -2847,6 +2869,7 @@ write_module(Tree, Name, Dir, Opts) ->
end,
Printer = proplists:get_value(printer, Opts),
FD = open_output_file(File),
+ ok = output_encoding(FD, Opts),
verbose("writing to file `~s'.", [File], Opts),
V = (catch {ok, output(FD, Printer, Tree, Opts)}),
ok = file:close(FD),