From 15a631cb2ec70860bc58492020904b1b16fed5c4 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 29 Mar 2017 15:48:27 +0200 Subject: edoc: Fix EDoc regarding Unicode atoms Also extended the scanner to handle the \x{...} syntax. Capitalizing now works with a few more characters. --- lib/edoc/src/edoc_scanner.erl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib/edoc/src/edoc_scanner.erl') diff --git a/lib/edoc/src/edoc_scanner.erl b/lib/edoc/src/edoc_scanner.erl index f1d5e1d4b9..35d00c6c0e 100644 --- a/lib/edoc/src/edoc_scanner.erl +++ b/lib/edoc/src/edoc_scanner.erl @@ -86,6 +86,8 @@ scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks scan1(Cs, Toks, Pos); scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom scan_atom(C, Cs, Toks, Pos); +scan1([C|Cs], Toks, Pos) when C >= $\337, C =< $\377, C /= $\367 -> + scan_atom(C, Cs, Toks, Pos); scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers scan_number(C, Cs, Toks, Pos); scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers @@ -96,6 +98,8 @@ scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables scan_variable(C, Cs, Toks, Pos); scan1([$_|Cs], Toks, Pos) -> % Variables scan_variable($_, Cs, Toks, Pos); +scan1([C|Cs], Toks, Pos) when C >= $\300, C =< $\336, C /= $\327 -> + scan_variable(C, Cs, Toks, Pos); scan1([$$|Cs], Toks, Pos) -> % Character constant case scan_char_const(Cs, Toks, Pos) of {ok, Result} -> @@ -261,6 +265,15 @@ scan_char([], _Pos) -> %% The following conforms to Standard Erlang escape sequences. +-define(HEX(C), C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f). + +-define(UNICODE(C), + (C >= 0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF)). + scan_escape([O1, O2, O3 | Cs], Pos) when % \<1-3> octal digits O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> Val = (O1*8 + O2)*8 + O3 - 73*$0, @@ -272,6 +285,11 @@ scan_escape([O1, O2 | Cs], Pos) when scan_escape([O1 | Cs], Pos) when O1 >= $0, O1 =< $7 -> {O1 - $0,Cs,Pos}; +scan_escape([$x, ${ | Cs], Pos) -> + scan_hex(Cs, Pos, []); +scan_escape([$x, H1, H2 | Cs], Pos) when ?HEX(H1), ?HEX(H2) -> + Val = (H1*16 + H2) - 17*$0, + {Val,Cs,Pos}; scan_escape([$^, C | Cs], Pos) -> % \^X -> CTL-X if C >= $\100, C =< $\137 -> {C - $\100,Cs,Pos}; @@ -285,6 +303,18 @@ scan_escape([C | Cs], Pos) -> scan_escape([], _Pos) -> {error, truncated_char}. +scan_hex([C | Cs], Pos, HCs) when ?HEX(C) -> + scan_hex(Cs, Pos, [C | HCs]); +scan_hex([$} | Cs], Pos, HCs) -> + case catch erlang:list_to_integer(lists:reverse(HCs), 16) of + Val when ?UNICODE(Val) -> + {Val,Cs,Pos}; + _ -> + {error, undefined_escape_sequence} + end; +scan_hex(_Cs, _Pos, _HCs) -> + {error, undefined_escape_sequence}. + %% Note that we return $\000 for undefined escapes. escape_char($b) -> $\010; % \b = BS escape_char($d) -> $\177; % \d = DEL -- cgit v1.2.3