aboutsummaryrefslogtreecommitdiffstats
path: root/lib/edoc/src/edoc_scanner.erl
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-03-29 15:48:27 +0200
committerHans Bolinder <[email protected]>2017-04-07 08:57:27 +0200
commit15a631cb2ec70860bc58492020904b1b16fed5c4 (patch)
tree7421baad6b9bfcb6670984721ec0ab49273bdbc3 /lib/edoc/src/edoc_scanner.erl
parent9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261 (diff)
downloadotp-15a631cb2ec70860bc58492020904b1b16fed5c4.tar.gz
otp-15a631cb2ec70860bc58492020904b1b16fed5c4.tar.bz2
otp-15a631cb2ec70860bc58492020904b1b16fed5c4.zip
edoc: Fix EDoc regarding Unicode atoms
Also extended the scanner to handle the \x{...} syntax. Capitalizing now works with a few more characters.
Diffstat (limited to 'lib/edoc/src/edoc_scanner.erl')
-rw-r--r--lib/edoc/src/edoc_scanner.erl30
1 files changed, 30 insertions, 0 deletions
diff --git a/lib/edoc/src/edoc_scanner.erl b/lib/edoc/src/edoc_scanner.erl
index f1d5e1d4b9..35d00c6c0e 100644
--- a/lib/edoc/src/edoc_scanner.erl
+++ b/lib/edoc/src/edoc_scanner.erl
@@ -86,6 +86,8 @@ scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks
scan1(Cs, Toks, Pos);
scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom
scan_atom(C, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\337, C =< $\377, C /= $\367 ->
+ scan_atom(C, Cs, Toks, Pos);
scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers
scan_number(C, Cs, Toks, Pos);
scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
@@ -96,6 +98,8 @@ scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables
scan_variable(C, Cs, Toks, Pos);
scan1([$_|Cs], Toks, Pos) -> % Variables
scan_variable($_, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\300, C =< $\336, C /= $\327 ->
+ scan_variable(C, Cs, Toks, Pos);
scan1([$$|Cs], Toks, Pos) -> % Character constant
case scan_char_const(Cs, Toks, Pos) of
{ok, Result} ->
@@ -261,6 +265,15 @@ scan_char([], _Pos) ->
%% The following conforms to Standard Erlang escape sequences.
+-define(HEX(C), C >= $0 andalso C =< $9 orelse
+ C >= $A andalso C =< $F orelse
+ C >= $a andalso C =< $f).
+
+-define(UNICODE(C),
+ (C >= 0 andalso C < 16#D800 orelse
+ C > 16#DFFF andalso C < 16#FFFE orelse
+ C > 16#FFFF andalso C =< 16#10FFFF)).
+
scan_escape([O1, O2, O3 | Cs], Pos) when % \<1-3> octal digits
O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
Val = (O1*8 + O2)*8 + O3 - 73*$0,
@@ -272,6 +285,11 @@ scan_escape([O1, O2 | Cs], Pos) when
scan_escape([O1 | Cs], Pos) when
O1 >= $0, O1 =< $7 ->
{O1 - $0,Cs,Pos};
+scan_escape([$x, ${ | Cs], Pos) ->
+ scan_hex(Cs, Pos, []);
+scan_escape([$x, H1, H2 | Cs], Pos) when ?HEX(H1), ?HEX(H2) ->
+ Val = (H1*16 + H2) - 17*$0,
+ {Val,Cs,Pos};
scan_escape([$^, C | Cs], Pos) -> % \^X -> CTL-X
if C >= $\100, C =< $\137 ->
{C - $\100,Cs,Pos};
@@ -285,6 +303,18 @@ scan_escape([C | Cs], Pos) ->
scan_escape([], _Pos) ->
{error, truncated_char}.
+scan_hex([C | Cs], Pos, HCs) when ?HEX(C) ->
+ scan_hex(Cs, Pos, [C | HCs]);
+scan_hex([$} | Cs], Pos, HCs) ->
+ case catch erlang:list_to_integer(lists:reverse(HCs), 16) of
+ Val when ?UNICODE(Val) ->
+ {Val,Cs,Pos};
+ _ ->
+ {error, undefined_escape_sequence}
+ end;
+scan_hex(_Cs, _Pos, _HCs) ->
+ {error, undefined_escape_sequence}.
+
%% Note that we return $\000 for undefined escapes.
escape_char($b) -> $\010; % \b = BS
escape_char($d) -> $\177; % \d = DEL