aboutsummaryrefslogtreecommitdiffstats
path: root/lib/kernel
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2012-10-04 15:58:26 +0200
committerHans Bolinder <[email protected]>2013-01-02 10:15:17 +0100
commit300c5466a7c9cfe3ed22bba2a88ba21058406402 (patch)
treeb8c30800b17d5ae98255de2fd2818d8b5d4d6eba /lib/kernel
parent7a884a31cfcaaf23f7920ba1a006aa2855529030 (diff)
downloadotp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.gz
otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.bz2
otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.zip
[stdlib, kernel] Introduce Unicode support for Erlang source files
Expect modifications, additions and corrections. There is a kludge in file_io_server and erl_scan:continuation_location() that's not so pleasing.
Diffstat (limited to 'lib/kernel')
-rw-r--r--lib/kernel/doc/src/file.xml20
-rw-r--r--lib/kernel/src/file.erl2
-rw-r--r--lib/kernel/src/file_io_server.erl18
3 files changed, 36 insertions, 4 deletions
diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml
index 89190d8668..d7bdcef53c 100644
--- a/lib/kernel/doc/src/file.xml
+++ b/lib/kernel/doc/src/file.xml
@@ -261,6 +261,9 @@
{person, "pelle", 30}.</code>
<pre>1> <input>file:consult("f.txt").</input>
{ok,[{person,"kalle",25},{person,"pelle",30}]}</pre>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
</desc>
</func>
<func>
@@ -399,6 +402,9 @@
of the error.</p>
</item>
</taglist>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
</desc>
</func>
<func>
@@ -865,6 +871,9 @@
the error.</p>
</item>
</taglist>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
</desc>
</func>
<func>
@@ -906,6 +915,9 @@
of the error.</p>
</item>
</taglist>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
</desc>
</func>
<func>
@@ -975,7 +987,10 @@
of the error.</p>
</item>
</taglist>
- </desc>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
+ </desc>
</func>
<func>
<name name="path_script" arity="3"/>
@@ -1506,6 +1521,9 @@
of the error.</p>
</item>
</taglist>
+ <p>The encoding of of <c><anno>Filename</anno></c> can be set
+ by a comment as described in <seealso
+ marker="stdlib:epp#encoding">epp(3)</seealso>.</p>
</desc>
</func>
<func>
diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl
index 4f7c984b11..de3eaad5a1 100644
--- a/lib/kernel/src/file.erl
+++ b/lib/kernel/src/file.erl
@@ -1315,6 +1315,7 @@ sendfile_send(Sock, Data, Old) ->
%%% Helpers
consult_stream(Fd) ->
+ _ = epp:set_encoding(Fd),
consult_stream(Fd, 1, []).
consult_stream(Fd, Line, Acc) ->
@@ -1328,6 +1329,7 @@ consult_stream(Fd, Line, Acc) ->
end.
eval_stream(Fd, Handling, Bs) ->
+ _ = epp:set_encoding(Fd),
eval_stream(Fd, Handling, 1, undefined, [], Bs).
eval_stream(Fd, H, Line, Last, E, Bs) ->
diff --git a/lib/kernel/src/file_io_server.erl b/lib/kernel/src/file_io_server.erl
index 0bff56cf46..acaffe1e41 100644
--- a/lib/kernel/src/file_io_server.erl
+++ b/lib/kernel/src/file_io_server.erl
@@ -40,6 +40,8 @@ format_error({_Line, ?MODULE, Reason}) ->
io_lib:format("~w", [Reason]);
format_error({_Line, Mod, Reason}) ->
Mod:format_error(Reason);
+format_error(invalid_unicode) ->
+ io_lib:format("cannot translate from UTF-8", []);
format_error(ErrorId) ->
erl_posix_msg:message(ErrorId).
@@ -549,7 +551,7 @@ get_chars_notempty(Mod, Func, XtraArg, S, OutEnc,
<<>> ->
get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof);
_ ->
- {stop,invalid_unicode,{error,invalid_unicode},State}
+ {stop,invalid_unicode,invalid_unicode_error(Mod, Func, XtraArg, S),State}
end;
{error,Reason}=Error ->
{stop,Reason,Error,State}
@@ -616,12 +618,22 @@ get_chars_apply(Mod, Func, XtraArg, S0, OutEnc,
end
catch
exit:ExReason ->
- {stop,ExReason,{error,err_func(Mod, Func, XtraArg)},State};
+ {stop,ExReason,invalid_unicode_error(Mod, Func, XtraArg, S0),State};
error:ErrReason ->
{stop,ErrReason,{error,err_func(Mod, Func, XtraArg)},State}
end.
-
+%% A hack that tries to inform the caller about the position where the
+%% error occured.
+invalid_unicode_error(Mod, Func, XtraArg, S) ->
+ try
+ {erl_scan,tokens,_Args} = XtraArg,
+ Location = erl_scan:continuation_location(S),
+ {error,{Location, ?MODULE, invalid_unicode},Location}
+ catch
+ _:_ ->
+ {error,err_func(Mod, Func, XtraArg)}
+ end.
%% Convert error code to make it look as before
err_func(io_lib, get_until, {_,F,_}) ->