aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/erl_parse.yrl
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2012-10-04 15:58:26 +0200
committerHans Bolinder <[email protected]>2013-01-02 10:15:17 +0100
commit300c5466a7c9cfe3ed22bba2a88ba21058406402 (patch)
treeb8c30800b17d5ae98255de2fd2818d8b5d4d6eba /lib/stdlib/src/erl_parse.yrl
parent7a884a31cfcaaf23f7920ba1a006aa2855529030 (diff)
downloadotp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.gz
otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.bz2
otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.zip
[stdlib, kernel] Introduce Unicode support for Erlang source files
Expect modifications, additions and corrections. There is a kludge in file_io_server and erl_scan:continuation_location() that's not so pleasing.
Diffstat (limited to 'lib/stdlib/src/erl_parse.yrl')
-rw-r--r--lib/stdlib/src/erl_parse.yrl128
1 files changed, 66 insertions, 62 deletions
diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl
index 928c10f7f2..27a2ba80eb 100644
--- a/lib/stdlib/src/erl_parse.yrl
+++ b/lib/stdlib/src/erl_parse.yrl
@@ -2,7 +2,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2011. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -923,73 +923,77 @@ normalise_list([]) ->
-spec abstract(Data) -> AbsTerm when
Data :: term(),
AbsTerm :: abstract_expr().
-abstract(T) when is_integer(T) -> {integer,0,T};
-abstract(T) when is_float(T) -> {float,0,T};
-abstract(T) when is_atom(T) -> {atom,0,T};
-abstract([]) -> {nil,0};
-abstract(B) when is_bitstring(B) ->
- {bin, 0, [abstract_byte(Byte, 0) || Byte <- bitstring_to_list(B)]};
-abstract([C|T]) when is_integer(C), 0 =< C, C < 256 ->
- abstract_string(T, [C]);
-abstract([H|T]) ->
- {cons,0,abstract(H),abstract(T)};
-abstract(Tuple) when is_tuple(Tuple) ->
- {tuple,0,abstract_list(tuple_to_list(Tuple))}.
-
-abstract_string([C|T], String) when is_integer(C), 0 =< C, C < 256 ->
- abstract_string(T, [C|String]);
-abstract_string([], String) ->
- {string, 0, lists:reverse(String)};
-abstract_string(T, String) ->
- not_string(String, abstract(T)).
-
-not_string([C|T], Result) ->
- not_string(T, {cons, 0, {integer, 0, C}, Result});
-not_string([], Result) ->
+abstract(T) ->
+ abstract(T, 0, epp:default_encoding()).
+
+%%% abstract/2 takes line and encoding options
+-spec abstract(Data, Options) -> AbsTerm when
+ Data :: term(),
+ Options :: Line | [Option],
+ Option :: {line, Line} | {encoding, Encoding},
+ Encoding :: latin1 | unicode | utf8,
+ Line :: erl_scan:line(),
+ AbsTerm :: abstract_expr().
+
+abstract(T, Line) when is_integer(Line) ->
+ abstract(T, Line, epp:default_encoding());
+abstract(T, Options) when is_list(Options) ->
+ Line = proplists:get_value(line, Options, 0),
+ Encoding = proplists:get_value(encoding, Options,epp:default_encoding()),
+ abstract(T, Line, Encoding).
+
+-define(UNICODE(C),
+ (C >= 0 andalso C < 16#D800 orelse
+ C > 16#DFFF andalso C < 16#FFFE orelse
+ C > 16#FFFF andalso C =< 16#10FFFF)).
+
+abstract(T, L, _E) when is_integer(T) -> {integer,L,T};
+abstract(T, L, _E) when is_float(T) -> {float,L,T};
+abstract(T, L, _E) when is_atom(T) -> {atom,L,T};
+abstract([], L, _E) -> {nil,L};
+abstract(B, L, _E) when is_bitstring(B) ->
+ {bin, L, [abstract_byte(Byte, L) || Byte <- bitstring_to_list(B)]};
+abstract([C|T], L, unicode=E) when ?UNICODE(C) ->
+ abstract_unicode_string(T, [C], L, E);
+abstract([C|T], L, utf8=E) when ?UNICODE(C) ->
+ abstract_unicode_string(T, [C], L, E);
+abstract([C|T], L, latin1=E) when is_integer(C), 0 =< C, C < 256 ->
+ abstract_string(T, [C], L, E);
+abstract([H|T], L, E) ->
+ {cons,L,abstract(H, L, E),abstract(T, L, E)};
+abstract(Tuple, L, E) when is_tuple(Tuple) ->
+ {tuple,L,abstract_list(tuple_to_list(Tuple), L, E)}.
+
+abstract_string([C|T], String, L, E) when is_integer(C), 0 =< C, C < 256 ->
+ abstract_string(T, [C|String], L, E);
+abstract_string([], String, L, _E) ->
+ {string, L, lists:reverse(String)};
+abstract_string(T, String, L, E) ->
+ not_string(String, abstract(T, L, E), L, E).
+
+abstract_unicode_string([C|T], String, L, E) when ?UNICODE(C) ->
+ abstract_unicode_string(T, [C|String], L, E);
+abstract_unicode_string([], String, L, _E) ->
+ {string, L, lists:reverse(String)};
+abstract_unicode_string(T, String, L, E) ->
+ not_string(String, abstract(T, L, E), L, E).
+
+not_string([C|T], Result, L, E) ->
+ not_string(T, {cons, L, {integer, L, C}, Result}, L, E);
+not_string([], Result, _L, _E) ->
Result.
-abstract_list([H|T]) ->
- [abstract(H)|abstract_list(T)];
-abstract_list([]) ->
+abstract_list([H|T], L, E) ->
+ [abstract(H, L, E)|abstract_list(T, L, E)];
+abstract_list([], _L, _E) ->
[].
-abstract_byte(Byte, Line) when is_integer(Byte) ->
- {bin_element, Line, {integer, Line, Byte}, default, default};
-abstract_byte(Bits, Line) ->
+abstract_byte(Byte, L) when is_integer(Byte) ->
+ {bin_element, L, {integer, L, Byte}, default, default};
+abstract_byte(Bits, L) ->
Sz = bit_size(Bits),
<<Val:Sz>> = Bits,
- {bin_element, Line, {integer, Line, Val}, {integer, Line, Sz}, default}.
-
-%%% abstract/2 keeps the line number
-abstract(T, Line) when is_integer(T) -> {integer,Line,T};
-abstract(T, Line) when is_float(T) -> {float,Line,T};
-abstract(T, Line) when is_atom(T) -> {atom,Line,T};
-abstract([], Line) -> {nil,Line};
-abstract(B, Line) when is_bitstring(B) ->
- {bin, Line, [abstract_byte(Byte, Line) || Byte <- bitstring_to_list(B)]};
-abstract([C|T], Line) when is_integer(C), 0 =< C, C < 256 ->
- abstract_string(T, [C], Line);
-abstract([H|T], Line) ->
- {cons,Line,abstract(H, Line),abstract(T, Line)};
-abstract(Tuple, Line) when is_tuple(Tuple) ->
- {tuple,Line,abstract_list(tuple_to_list(Tuple), Line)}.
-
-abstract_string([C|T], String, Line) when is_integer(C), 0 =< C, C < 256 ->
- abstract_string(T, [C|String], Line);
-abstract_string([], String, Line) ->
- {string, Line, lists:reverse(String)};
-abstract_string(T, String, Line) ->
- not_string(String, abstract(T, Line), Line).
-
-not_string([C|T], Result, Line) ->
- not_string(T, {cons, Line, {integer, Line, C}, Result}, Line);
-not_string([], Result, _Line) ->
- Result.
-
-abstract_list([H|T], Line) ->
- [abstract(H, Line)|abstract_list(T, Line)];
-abstract_list([], _Line) ->
- [].
+ {bin_element, L, {integer, L, Val}, {integer, L, Sz}, default}.
%% Generate a list of tokens representing the abstract term.