aboutsummaryrefslogtreecommitdiffstats
path: root/lib/edoc/src/edoc_scanner.erl
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-04-10 08:58:25 +0200
committerHans Bolinder <[email protected]>2017-04-10 08:58:25 +0200
commit710abdb5d79554cde4e0ca051bfa16bdcfdccc57 (patch)
tree8e99daa1f998f49b0bd78a1937d70c9ca07f59eb /lib/edoc/src/edoc_scanner.erl
parent3a29920a05cedee8c1f7501ee7b1aa22e068efed (diff)
parent73657a28e74f8ad12ddb4fea49272a74f5b823c3 (diff)
downloadotp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.tar.gz
otp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.tar.bz2
otp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.zip
Merge branch 'hasse/unicode_atoms/OTP-14285'
* hasse/unicode_atoms/OTP-14285: syntax_tools: Fix Syntax Tools regarding Unicode atoms debugger: Show Latin-1 code correctly edoc: Fix EDoc regarding Unicode atoms parsetools: Fix Yecc regarding Unicode atoms parsetools: Fix Leex regarding Unicode atoms stdlib: Fix Erlang shell regarding Unicode atoms stdlib: Fix Erlang pretty printer regarding Unicode atoms stdlib: Add function to io_lib to handle Unicode atoms
Diffstat (limited to 'lib/edoc/src/edoc_scanner.erl')
-rw-r--r--lib/edoc/src/edoc_scanner.erl30
1 files changed, 30 insertions, 0 deletions
diff --git a/lib/edoc/src/edoc_scanner.erl b/lib/edoc/src/edoc_scanner.erl
index f1d5e1d4b9..35d00c6c0e 100644
--- a/lib/edoc/src/edoc_scanner.erl
+++ b/lib/edoc/src/edoc_scanner.erl
@@ -86,6 +86,8 @@ scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks
scan1(Cs, Toks, Pos);
scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom
scan_atom(C, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\337, C =< $\377, C /= $\367 ->
+ scan_atom(C, Cs, Toks, Pos);
scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers
scan_number(C, Cs, Toks, Pos);
scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
@@ -96,6 +98,8 @@ scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables
scan_variable(C, Cs, Toks, Pos);
scan1([$_|Cs], Toks, Pos) -> % Variables
scan_variable($_, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\300, C =< $\336, C /= $\327 ->
+ scan_variable(C, Cs, Toks, Pos);
scan1([$$|Cs], Toks, Pos) -> % Character constant
case scan_char_const(Cs, Toks, Pos) of
{ok, Result} ->
@@ -261,6 +265,15 @@ scan_char([], _Pos) ->
%% The following conforms to Standard Erlang escape sequences.
+-define(HEX(C), C >= $0 andalso C =< $9 orelse
+ C >= $A andalso C =< $F orelse
+ C >= $a andalso C =< $f).
+
+-define(UNICODE(C),
+ (C >= 0 andalso C < 16#D800 orelse
+ C > 16#DFFF andalso C < 16#FFFE orelse
+ C > 16#FFFF andalso C =< 16#10FFFF)).
+
scan_escape([O1, O2, O3 | Cs], Pos) when % \<1-3> octal digits
O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
Val = (O1*8 + O2)*8 + O3 - 73*$0,
@@ -272,6 +285,11 @@ scan_escape([O1, O2 | Cs], Pos) when
scan_escape([O1 | Cs], Pos) when
O1 >= $0, O1 =< $7 ->
{O1 - $0,Cs,Pos};
+scan_escape([$x, ${ | Cs], Pos) ->
+ scan_hex(Cs, Pos, []);
+scan_escape([$x, H1, H2 | Cs], Pos) when ?HEX(H1), ?HEX(H2) ->
+ Val = (H1*16 + H2) - 17*$0,
+ {Val,Cs,Pos};
scan_escape([$^, C | Cs], Pos) -> % \^X -> CTL-X
if C >= $\100, C =< $\137 ->
{C - $\100,Cs,Pos};
@@ -285,6 +303,18 @@ scan_escape([C | Cs], Pos) ->
scan_escape([], _Pos) ->
{error, truncated_char}.
+scan_hex([C | Cs], Pos, HCs) when ?HEX(C) ->
+ scan_hex(Cs, Pos, [C | HCs]);
+scan_hex([$} | Cs], Pos, HCs) ->
+ case catch erlang:list_to_integer(lists:reverse(HCs), 16) of
+ Val when ?UNICODE(Val) ->
+ {Val,Cs,Pos};
+ _ ->
+ {error, undefined_escape_sequence}
+ end;
+scan_hex(_Cs, _Pos, _HCs) ->
+ {error, undefined_escape_sequence}.
+
%% Note that we return $\000 for undefined escapes.
escape_char($b) -> $\010; % \b = BS
escape_char($d) -> $\177; % \d = DEL