From 92c79e394041f76d8f676cafe9b6af44522497bd Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Wed, 22 Mar 2017 09:41:57 +0100
Subject: stdlib: Add function to io_lib to handle Unicode atoms

---
 lib/stdlib/doc/src/io_lib.xml    | 14 ++++++++++++--
 lib/stdlib/src/io_lib.erl        | 24 +++++++++++++++++++-----
 lib/stdlib/src/io_lib_pretty.erl | 14 ++++++++++++--
 lib/stdlib/test/io_SUITE.erl     | 29 +++++++++++++++++++++++++++--
 4 files changed, 70 insertions(+), 11 deletions(-)

(limited to 'lib')

diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml
index 931e50f6f2..5ae400da62 100644
--- a/lib/stdlib/doc/src/io_lib.xml
+++ b/lib/stdlib/doc/src/io_lib.xml
@@ -4,7 +4,7 @@
 <erlref>
   <header>
     <copyright>
-      <year>1996</year><year>2016</year>
+      <year>1996</year><year>2017</year>
       <holder>Ericsson AB. All Rights Reserved.</holder>
     </copyright>
     <legalnotice>
@@ -147,7 +147,7 @@
           format string (that is, <c>~ts</c> or <c>~tc</c>), the resulting list
           can contain characters beyond the ISO Latin-1 character range
           (that is, numbers &gt; 255). If so, the
-          result is not an ordinary Erlang <c>string()</c>, but can well be
+          result is still an ordinary Erlang <c>string()</c>, and can well be
           used in any context where Unicode data is allowed.</p>
       </desc>
     </func>
@@ -383,6 +383,16 @@
       </desc>
     </func>
 
+    <func>
+      <name name="write_atom_as_latin1" arity="1"/>
+      <fsummary>Write an atom.</fsummary>
+      <desc>
+        <p>Returns the list of characters needed to print atom
+          <c><anno>Atom</anno></c>. Non-Latin-1 characters
+          are escaped.</p>
+      </desc>
+    </func>
+
     <func>
       <name name="write_char" arity="1"/>
       <fsummary>Write a character.</fsummary>
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index a91143a764..28e5007e5a 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -68,8 +68,8 @@
 -export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1,
          write_latin1_string/2, write_char/1, write_latin1_char/1]).
 
--export([write_string_as_latin1/1, write_string_as_latin1/2,
-         write_char_as_latin1/1]).
+-export([write_atom_as_latin1/1, write_string_as_latin1/1,
+         write_string_as_latin1/2, write_char_as_latin1/1]).
 
 -export([quote_atom/2, char_list/1, latin1_char_list/1,
 	 deep_char_list/1, deep_latin1_char_list/1,
@@ -344,6 +344,11 @@ write_binary_body(B, _D) ->
     <<X:L>> = B,
     [integer_to_list(X),$:,integer_to_list(L)].
 
+%%% There are two functions to write Unicode atoms:
+%%% - they both escape control characters < 160;
+%%% - write_atom() never escapes characters >= 160;
+%%% - write_atom_as_latin1() also escapes characters >= 255.
+
 %% write_atom(Atom) -> [Char]
 %%  Generate the list of characters needed to print an atom.
 
@@ -351,17 +356,26 @@ write_binary_body(B, _D) ->
       Atom :: atom().
 
 write_atom(Atom) ->
+    write_possibly_quoted_atom(Atom, fun write_string/2).
+
+-spec write_atom_as_latin1(Atom) -> latin1_string() when
+      Atom :: atom().
+
+write_atom_as_latin1(Atom) ->
+    write_possibly_quoted_atom(Atom, fun write_string_as_latin1/2).
+
+write_possibly_quoted_atom(Atom, PFun) ->
     Chars = atom_to_list(Atom),
     case quote_atom(Atom, Chars) of
 	true ->
-	    write_string(Chars, $');   %'
+            PFun(Chars, $');   %'
 	false ->
 	    Chars
     end.
 
 %% quote_atom(Atom, CharList)
 %%  Return 'true' if atom with chars in CharList needs to be quoted, else
-%%  return 'false'.
+%%  return 'false'. Notice that characters >= 160 are always quoted.
 
 -spec quote_atom(atom(), chars()) -> boolean().
 
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index aabccfc5d9..ff368d02da 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -105,6 +105,8 @@ print(_, _, _, 0, _M, _RF, _Enc, _Str) -> "...";
 print(Term, Col, Ll, D, M, RecDefFun, Enc, Str) when Col =< 0 ->
     %% ensure Col is at least 1
     print(Term, 1, Ll, D, M, RecDefFun, Enc, Str);
+print(Atom, _Col, _Ll, _D, _M, _RF, Enc, _Str) when is_atom(Atom) ->
+    write_atom(Atom, Enc);
 print(Term, Col, Ll, D, M0, RecDefFun, Enc, Str) when is_tuple(Term);
                                                       is_list(Term);
                                                       is_map(Term);
@@ -407,6 +409,9 @@ print_length({}, _D, _RF, _Enc, _Str) ->
     {"{}", 2};
 print_length(#{}=M, _D, _RF, _Enc, _Str) when map_size(M) =:= 0 ->
     {"#{}", 3};
+print_length(Atom, _D, _RF, Enc, _Str) when is_atom(Atom) ->
+    S = write_atom(Atom, Enc),
+    {S, lists:flatlength(S)};
 print_length(List, D, RF, Enc, Str) when is_list(List) ->
     %% only flat lists are "printable"
     case Str andalso printable_list(List, D, Enc) of
@@ -500,7 +505,7 @@ print_length_tuple(Tuple, D, RF, Enc, Str) ->
 print_length_record(_Tuple, 1, _RF, _RDefs, _Enc, _Str) ->
     {"{...}", 5};
 print_length_record(Tuple, D, RF, RDefs, Enc, Str) ->
-    Name = [$# | io_lib:write_atom(element(1, Tuple))],
+    Name = [$# | write_atom(element(1, Tuple), Enc)],
     NameL = length(Name),
     Elements = tl(tuple_to_list(Tuple)),
     L = print_length_fields(RDefs, D - 1, Elements, RF, Enc, Str),
@@ -515,7 +520,7 @@ print_length_fields([Def | Defs], D, [E | Es], RF, Enc, Str) ->
      print_length_fields(Defs, D - 1, Es, RF, Enc, Str)].
 
 print_length_field(Def, D, E, RF, Enc, Str) ->
-    Name = io_lib:write_atom(Def),
+    Name = write_atom(Def, Enc),
     {S, L} = print_length(E, D, RF, Enc, Str),
     NameL = length(Name) + 3,
     {{field, Name, NameL, {S, L}}, NameL + L}.
@@ -664,6 +669,11 @@ printable_char(C,unicode) ->
     C > 16#DFFF andalso C < 16#FFFE orelse
     C > 16#FFFF andalso C =< 16#10FFFF.
 
+write_atom(A, latin1) ->
+    io_lib:write_atom_as_latin1(A);
+write_atom(A, _Uni) ->
+    io_lib:write_atom(A).
+
 write_string(S, latin1) ->
     io_lib:write_latin1_string(S, $"); %"
 write_string(S, _Uni) ->
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index d546e8fad2..b2754e47ba 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -30,7 +30,8 @@
 	 io_lib_print_binary_depth_one/1, otp_10302/1, otp_10755/1,
          otp_10836/1, io_lib_width_too_small/1,
          io_with_huge_message_queue/1, format_string/1,
-	 maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1]).
+	 maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1,
+         otp_14285/1]).
 
 -export([pretty/2]).
 
@@ -61,7 +62,8 @@ all() ->
      printable_range, bad_printable_range,
      io_lib_print_binary_depth_one, otp_10302, otp_10755, otp_10836,
      io_lib_width_too_small, io_with_huge_message_queue,
-     format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175].
+     format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175,
+     otp_14285].
 
 %% Error cases for output.
 error_1(Config) when is_list(Config) ->
@@ -755,6 +757,8 @@ rfd(rrrrr, 3) ->
     [f1, f2, f3];
 rfd(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, 0) ->
     [];
+rfd('\x{400}', 1) ->
+    ['\x{400}'];
 rfd(_, _) ->
     no.
 
@@ -1881,6 +1885,7 @@ otp_10302(Suite) when is_list(Suite) ->
 pretty(Term, Depth) when is_integer(Depth) ->
     Opts = [{column, 1}, {line_length, 20},
             {depth, Depth}, {max_chars, 60},
+            {record_print_fun, fun rfd/2},
             {encoding, unicode}],
     pretty(Term, Opts);
 pretty(Term, Opts) when is_list(Opts) ->
@@ -2324,3 +2329,23 @@ text1([T|Ts]) ->
     [erl_anno:text(Anno) | text1(Ts)].
 
 -endif. % EXACT
+
+otp_14285(_Config) ->
+    UOpts = [{record_print_fun, fun rfd/2},
+             {encoding, unicode}],
+    LOpts = [{record_print_fun, fun rfd/2},
+             {encoding, latin1}],
+
+    RT = {'\x{400}','\x{400}'},
+    "#'\x{400}'{'\x{400}' = '\x{400}'}" = pretty(RT, UOpts),
+    "#'\\x{400}'{'\\x{400}' = '\\x{400}'}" = pretty(RT, LOpts),
+
+    Chars = lists:seq(0, 512),
+    [] = [C ||
+             C <- Chars,
+             S <- io_lib:write_atom_as_latin1(list_to_atom([C])),
+             not is_latin1(S)],
+    L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])),
+               not is_latin1(S)],
+    L1 = lists:seq(256, 512),
+    ok.
-- 
cgit v1.2.3


From 421012d3e8062c9522e8b425cd936009ba83c887 Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Tue, 21 Mar 2017 15:32:11 +0100
Subject: stdlib: Fix Erlang pretty printer regarding Unicode atoms

---
 lib/stdlib/src/erl_pp.erl        | 116 ++++++++++++++++++++++++---------------
 lib/stdlib/test/erl_pp_SUITE.erl |  41 ++++++++++----
 2 files changed, 104 insertions(+), 53 deletions(-)

(limited to 'lib')

diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index 6068afb293..ee5e7a11bf 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@
                 | {encoding, latin1 | unicode | utf8}).
 -type(options() :: hook_function() | [option()]).
 
--record(pp, {string_fun, char_fun}).
+-record(pp, {value_fun, string_fun, char_fun}).
 
 -record(options, {hook, encoding, opts}).
 
@@ -214,11 +214,15 @@ state(_Hook) ->
     state().
 
 state() ->
-    #pp{string_fun = fun io_lib:write_string_as_latin1/1,
+    Options = [{encoding,latin1}],
+    #pp{value_fun  = fun(V) -> io_lib_pretty:print(V, Options) end,
+        string_fun = fun io_lib:write_string_as_latin1/1,
         char_fun   = fun io_lib:write_char_as_latin1/1}.
 
 unicode_state() ->
-    #pp{string_fun = fun io_lib:write_string/1,
+    Options = [{encoding,unicode}],
+    #pp{value_fun  = fun(V) -> io_lib_pretty:print(V, Options) end,
+        string_fun = fun io_lib:write_string/1,
         char_fun   = fun io_lib:write_char/1}.
 
 encoding(Options) ->
@@ -253,31 +257,30 @@ lattribute({attribute,_Line,Name,Arg}, Opts) ->
 
 lattribute(module, {M,Vs}, _Opts) ->
     A = a0(),
-    attr("module",[{var,A,pname(M)},
-                   foldr(fun(V, C) -> {cons,A,{var,A,V},C}
-                         end, {nil,A}, Vs)]);
+    attr(module,[{var,A,pname(M)},
+                 foldr(fun(V, C) -> {cons,A,{var,A,V},C}
+                       end, {nil,A}, Vs)]);
 lattribute(module, M, _Opts) ->
-    attr("module", [{var,a0(),pname(M)}]);
+    attr(module, [{var,a0(),pname(M)}]);
 lattribute(export, Falist, _Opts) ->
-    call({var,a0(),"-export"}, [falist(Falist)], 0, options(none));
+    attrib(export, falist(Falist));
 lattribute(import, Name, _Opts) when is_list(Name) ->
-    attr("import", [{var,a0(),pname(Name)}]);
+    attr(import, [{var,a0(),pname(Name)}]);
 lattribute(import, {From,Falist}, _Opts) ->
-    attr("import",[{var,a0(),pname(From)},falist(Falist)]);
+    attrib(import, [leaf(pname(From)),falist(Falist)]);
 lattribute(export_type, Talist, _Opts) ->
-    call({var,a0(),"-export_type"}, [falist(Talist)], 0, options(none));
+    attrib(export_type, falist(Talist));
 lattribute(optional_callbacks, Falist, Opts) ->
-    ArgL = try falist(Falist)
-           catch _:_ -> abstract(Falist, Opts)
-           end,
-    call({var,a0(),"-optional_callbacks"}, [ArgL], 0, options(none));
+    try attrib(optional_callbacks, falist(Falist))
+    catch _:_ -> attr(optional_callbacks, [abstract(Falist, Opts)])
+    end;
 lattribute(file, {Name,Line}, _Opts) ->
-    attr("file", [{string,a0(),Name},{integer,a0(),Line}]);
+    attr(file, [{string,a0(),Name},{integer,a0(),Line}]);
 lattribute(record, {Name,Is}, Opts) ->
-    Nl = leaf(format("-record(~w,", [Name])),
+    Nl = [leaf("-record("),{atom,Name},$,],
     [{first,Nl,record_fields(Is, Opts)},$)];
 lattribute(Name, Arg, Options) ->
-    attr(write(Name), [abstract(Arg, Options)]).
+    attr(Name, [abstract(Arg, Options)]).
 
 abstract(Arg, #options{encoding = Encoding}) ->
     erl_parse:abstract(Arg, [{encoding,Encoding}]).
@@ -340,7 +343,7 @@ ltype({user_type,Line,T,Ts}, _) ->
 ltype({remote_type,Line,[M,F,Ts]}, _) ->
     simple_type({remote,Line,M,F}, Ts);
 ltype({atom,_,T}, _) ->
-    leaf(write(T));
+    {atom,T};
 ltype(E, P) ->
     lexpr(E, P, options(none)).
 
@@ -382,12 +385,12 @@ tuple_type(Ts, F) ->
 specattr(SpecKind, {FuncSpec,TypeSpecs}) ->
     Func = case FuncSpec of
                {F,_A} ->
-                   format("~w", [F]);
+                   {atom,F};
                {M,F,_A} ->
-                   format("~w:~w", [M, F])
+                   [{atom,M},$:,{atom,F}]
            end,
     {first,leaf(lists:concat(["-", SpecKind, " "])),
-     {list,[{first,leaf(Func),spec_clauses(TypeSpecs)}]}}.
+     {list,[{first,Func,spec_clauses(TypeSpecs)}]}}.
 
 spec_clauses(TypeSpecs) ->
     {prefer_nl,[$;],[sig_type(T) || T <- TypeSpecs]}.
@@ -429,7 +432,10 @@ ltypes(Ts, F, Prec) ->
     [F(T, Prec) || T <- Ts].
 
 attr(Name, Args) ->
-    call({var,a0(),format("-~s", [Name])}, Args, 0, options(none)).
+    {first,[$-,{atom,Name}],args(Args, options(none))}.
+
+attrib(Name, Args) ->
+    {first,[$-,{atom,Name}],[{seq,$(,$),[$,],Args}]}.
 
 pname(['' | As]) ->
     [$. | pname(As)];
@@ -441,10 +447,13 @@ pname(A) when is_atom(A) ->
     write(A).
 
 falist([]) ->
-    {nil,a0()};
-falist([{Name,Arity}|Falist]) ->
-    A = a0(),
-    {cons,A,{var,A,format("~w/~w", [Name,Arity])},falist(Falist)}.
+    [leaf("[]")];
+falist(Falist) ->
+    L = [begin
+             {Name,Arity} = Fa,
+             [{atom,Name},leaf(format("/~w", [Arity]))]
+         end || Fa <- Falist],
+    [{seq,$[,$],$,,L}].
 
 lfunction({function,_Line,Name,_Arity,Cs}, Opts) ->
     Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Opts, Cs),
@@ -489,7 +498,7 @@ lexpr({var,_,V}, _, _) -> leaf(format("~ts", [V]));
 lexpr({char,_,C}, _, _) -> {char,C};
 lexpr({integer,_,N}, _, _) -> leaf(write(N));
 lexpr({float,_,F}, _, _) -> leaf(write(F));
-lexpr({atom,_,A}, _, _) -> leaf(write(A));
+lexpr({atom,_,A}, _, _) -> {atom,A};
 lexpr({string,_,S}, _, _) -> {string,S};
 lexpr({nil,_}, _, _) -> '[]';
 lexpr({cons,_,H,T}, _, Opts) ->
@@ -519,7 +528,7 @@ lexpr({record, _, Name, Fs}, Prec, Opts) ->
 lexpr({record_field, _, Rec, Name, F}, Prec, Opts) ->
     {L,P,R} = inop_prec('#'),
     Rl = lexpr(Rec, L, Opts),
-    Nl = leaf(format("#~w.", [Name])),
+    Nl = [$#,{atom,Name},$.],
     El = [Rl,Nl,lexpr(F, R, Opts)],
     maybe_paren(P, Prec, El);
 lexpr({record, _, Rec, Name, Fs}, Prec, Opts) ->
@@ -538,12 +547,12 @@ lexpr({record_field, _, Rec, F}, Prec, Opts) ->
     maybe_paren(P, Prec, El);
 lexpr({map, _, Fs}, Prec, Opts) ->
     {P,_R} = preop_prec('#'),
-    El = {first,leaf("#"),map_fields(Fs, Opts)},
+    El = {first,$#,map_fields(Fs, Opts)},
     maybe_paren(P, Prec, El);
 lexpr({map, _, Map, Fs}, Prec, Opts) ->
     {L,P,_R} = inop_prec('#'),
     Rl = lexpr(Map, L, Opts),
-    El = {first,[Rl,leaf("#")],map_fields(Fs, Opts)},
+    El = {first,[Rl,$#],map_fields(Fs, Opts)},
     maybe_paren(P, Prec, El);
 lexpr({block,_,Es}, _, Opts) ->
     {list,[{step,'begin',body(Es, Opts)},'end']};
@@ -563,13 +572,16 @@ lexpr({'receive',_,Cs,To,ToOpt}, _, Opts) ->
            {step,'after',Al},
            'end']};
 lexpr({'fun',_,{function,F,A}}, _Prec, _Opts) ->
-    leaf(format("fun ~w/~w", [F,A]));
-lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Opts) ->
-    {force_nl,fun_info(Extra),leaf(format("fun ~w/~w", [F,A]))};
-lexpr({'fun',_,{function,M,F,A}}, _Prec, _Opts)
+    [leaf("fun "),{atom,F},leaf(format("/~w", [A]))];
+lexpr({'fun',L,{function,_,_}=Func,Extra}, Prec, Opts) ->
+    {force_nl,fun_info(Extra),lexpr({'fun',L,Func}, Prec, Opts)};
+lexpr({'fun',L,{function,M,F,A}}, Prec, Opts)
   when is_atom(M), is_atom(F), is_integer(A) ->
     %% For backward compatibility with pre-R15 abstract format.
-    leaf(format("fun ~w:~w/~w", [M,F,A]));
+    Mod = erl_parse:abstract(M),
+    Fun = erl_parse:abstract(F),
+    Arity = erl_parse:abstract(A),
+    lexpr({'fun',L,{function,Mod,Fun,Arity}}, Prec, Opts);
 lexpr({'fun',_,{function,M,F,A}}, _Prec, Opts) ->
     %% New format in R15.
     NameItem = lexpr(M, Opts),
@@ -660,7 +672,7 @@ lexpr({bin,_,Fs}, _, Opts) ->
     bit_grp(Fs, Opts);
 %% Special case for straight values.
 lexpr({value,_,Val}, _,_) ->
-    leaf(write(Val));
+    {value,Val};
 %% Now do the hook.
 lexpr(Other, _Precedence, #options{hook = none}) ->
     leaf(format("INVALID-FORM:~w:",[Other]));
@@ -676,7 +688,7 @@ call(Name, Args, Prec, Opts) ->
     maybe_paren(P, Prec, Item).
 
 fun_info(Extra) ->
-    leaf(format("% fun-info: ~w", [Extra])).
+    [leaf("% fun-info: "),{value,Extra}].
 
 %% BITS:
 
@@ -717,7 +729,7 @@ bit_elem_type(T) ->
 %% end of BITS
 
 record_name(Name) ->
-    leaf(format("#~w", [Name])).
+    [$#,{atom,Name}].
 
 record_fields(Fs, Opts) ->
     tuple(Fs, fun record_field/2, Opts).
@@ -919,8 +931,10 @@ frmt(Item, I, PP) ->
 %%% - {force_nl,ExtraInfo,I}: fun-info (a comment) forces linebreak before I.
 %%% - {prefer_nl,Sep,IPs}: forces linebreak between Is unlesss negative
 %%%   indentation.
+%%% - {atom,A}: an atom
 %%% - {char,C}: a character
 %%% - {string,S}: a string.
+%%% - {value,T}: a term.
 %%% - {hook,...}, {ehook,...}: hook expressions.
 %%%
 %%% list, first, seq, force_nl, and prefer_nl all accept IPs, where each
@@ -981,6 +995,10 @@ f({prefer_nl,Sep,LItems}, I0, ST, WT, PP) ->
         true ->
             {insert_newlines(CharsSize2L, I0, ST),nsz(lists:last(Sizes), I0)}
     end;
+f({value,V}, I, ST, WT, PP) ->
+    f(write_a_value(V, PP), I, ST, WT, PP);
+f({atom,A}, I, ST, WT, PP) ->
+    f(write_an_atom(A, PP), I, ST, WT, PP);
 f({char,C}, I, ST, WT, PP) ->
     f(write_a_char(C, PP), I, ST, WT, PP);
 f({string,S}, I, ST, WT, PP) ->
@@ -1119,6 +1137,12 @@ has_nl([C|Cs]) ->
 has_nl([]) ->
     false.
 
+write_a_value(V, PP) ->
+    flat_leaf(write_value(V, PP)).
+
+write_an_atom(A, PP) ->
+    flat_leaf(write_atom(A, PP)).
+
 write_a_char(C, PP) ->
     flat_leaf(write_char(C, PP)).
 
@@ -1135,7 +1159,7 @@ write_a_string([], _N, _Len, _PP) ->
 write_a_string(S, N, Len, PP) ->
     SS = string:sub_string(S, 1, N),
     Sl = write_string(SS, PP),
-    case (length(Sl) > Len) and (N > ?MIN_SUBSTRING) of
+    case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of
         true ->
             write_a_string(S, N-1, Len, PP);
         false ->
@@ -1147,11 +1171,17 @@ flat_leaf(S) ->
     L = lists:flatten(S),
     {leaf,length(L),L}.
 
+write_value(V, PP) ->
+    (PP#pp.value_fun)(V).
+
+write_atom(A, PP) ->
+    (PP#pp.value_fun)(A).
+
 write_string(S, PP) ->
-    lists:flatten((PP#pp.string_fun)(S)).
+    (PP#pp.string_fun)(S).
 
 write_char(C, PP) ->
-    lists:flatten((PP#pp.char_fun)(C)).
+    (PP#pp.char_fun)(C).
 
 %%
 %% Utilities
diff --git a/lib/stdlib/test/erl_pp_SUITE.erl b/lib/stdlib/test/erl_pp_SUITE.erl
index 1a028204b4..808ba9b4c1 100644
--- a/lib/stdlib/test/erl_pp_SUITE.erl
+++ b/lib/stdlib/test/erl_pp_SUITE.erl
@@ -51,7 +51,7 @@
 	  otp_6321/1, otp_6911/1, otp_6914/1, otp_8150/1, otp_8238/1,
 	  otp_8473/1, otp_8522/1, otp_8567/1, otp_8664/1, otp_9147/1,
           otp_10302/1, otp_10820/1, otp_11100/1, otp_11861/1, pr_1014/1,
-          otp_13662/1]).
+          otp_13662/1, otp_14285/1]).
 
 %% Internal export.
 -export([ehook/6]).
@@ -80,7 +80,8 @@ groups() ->
      {tickets, [],
       [otp_6321, otp_6911, otp_6914, otp_8150, otp_8238,
        otp_8473, otp_8522, otp_8567, otp_8664, otp_9147,
-       otp_10302, otp_10820, otp_11100, otp_11861, pr_1014, otp_13662]}].
+       otp_10302, otp_10820, otp_11100, otp_11861, pr_1014, otp_13662,
+       otp_14285]}].
 
 init_per_suite(Config) ->
     Config.
@@ -627,11 +628,6 @@ do_hook(HookFun) ->
     true =
         "{some,value}" =:= lists:flatten(erl_pp:expr({value,A0,{some,value}})),
 
-    %% Silly...
-    true =
-        "if true -> 0 end" =:=
-              flat_expr({'if',0,[{clause,0,[],[],[{atom,0,0}]}]}),
-
     %% More compatibility: before R6
     OldIf = {'if',A0,[{clause,A0,[],[{atom,A0,true}],[{atom,A0,b}]}]},
     NewIf = {'if',A0,[{clause,A0,[],[[{atom,A0,true}]],[{atom,A0,b}]}]},
@@ -1069,9 +1065,6 @@ otp_11100(Config) when is_list(Config) ->
     %% doesn't make a difference (pp:bit_elem_type/1 is an example).
 
     A1 = erl_anno:new(1),
-    %% Cannot trigger the use of the hook function with export/import.
-    "-export([{fy,a}/b]).\n" =
-        pf({attribute,A1,export,[{{fy,a},b}]}),
     "-type foo() :: integer(INVALID-FORM:{foo,bar}:).\n" =
         pf({attribute,A1,type,{foo,{type,A1,integer,[{foo,bar}]},[]}}),
     pf({attribute,A1,type,
@@ -1146,6 +1139,34 @@ otp_13662(Config) ->
           ],
     compile(Config, Ts).
 
+otp_14285(_Config) ->
+    pp_forms(<<"-export([t/0, '\\x{400}\\''/0]).">>),
+    pp_forms(<<"-import(lists, [append/2]).">>),
+    pp_forms(<<"-optional_callbacks([]).">>),
+    pp_forms(<<"-optional_callbacks(['\\x{400}\\''/1]).">>),
+    pp_forms(<<"-'\\x{400}\\''('\\x{400}\\'').">>),
+    pp_forms(<<"-type '\\x{400}\\''() :: '\\x{400}\\''.">>),
+    pp_forms(<<"-record('\\x{400}\\'', {'\\x{400}\\''}).">>),
+    pp_forms(<<"-callback '\\x{400}\\''(_) -> '\\x{400}\\''.">>),
+    pp_forms(<<"t() -> '\\x{400}\\''('\\x{400}\\'').">>),
+    pp_forms(<<"'\\x{400}\\''(_) -> '\\x{400}\\''.">>),
+    pp_forms(<<"-spec '\\x{400}'() -> "
+               "#'\\x{400}'{'\\x{400}' :: '\\x{400}'}.">>),
+    pp_forms(<<"'\\x{400}\\''() ->"
+               "R = #'\\x{400}\\''{},"
+               "#'\\x{400}\\''{'\\x{400}\\'' ="
+               "{'\\x{400}\\'',"
+               "fun '\\x{400}\\''/0,"
+               "R#'\\x{400}\\''.'\\x{400}\\'',"
+               "#'\\x{400}\\''.'\\x{400}\\''}}.">>),
+
+    %% Special...
+    true =
+        "{some,'\\x{400}\\''}" =:=
+        lists:flatten(erl_pp:expr({value,erl_anno:new(0),{some,'\x{400}\''}},
+                                  [{encoding,latin1}])),
+    ok.
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 compile(Config, Tests) ->
-- 
cgit v1.2.3


From 46e08e6ac477d1acccb360ad5d616c96dcdfe850 Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Tue, 21 Mar 2017 16:34:32 +0100
Subject: stdlib: Fix Erlang shell regarding Unicode atoms

---
 lib/stdlib/doc/src/shell.xml    |  4 ++--
 lib/stdlib/src/shell.erl        | 17 +++++++++++++----
 lib/stdlib/test/shell_SUITE.erl | 23 ++++++++++++++++++++---
 3 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/stdlib/doc/src/shell.xml b/lib/stdlib/doc/src/shell.xml
index f52bc39deb..ab62c2fcdd 100644
--- a/lib/stdlib/doc/src/shell.xml
+++ b/lib/stdlib/doc/src/shell.xml
@@ -4,7 +4,7 @@
 <erlref>
   <header>
     <copyright>
-      <year>1996</year><year>2016</year>
+      <year>1996</year><year>2017</year>
       <holder>Ericsson AB. All Rights Reserved.</holder>
     </copyright>
     <legalnotice>
@@ -854,7 +854,7 @@ q                 - quit erlang
       <c>{history, N}</c>, where <c>N</c> is the current command number. The
       function is to return a list of characters or an atom. This
       constraint is because of the Erlang I/O protocol. Unicode characters
-      beyond code point 255 are allowed in the list. Notice
+      beyond code point 255 are allowed in the list and the atom. Notice
       that in restricted mode the call <c>Mod:Func(L)</c> must be
       allowed or the default shell prompt function is called.</p>
   </section>
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index 28f37ef8bf..394f4f2fa4 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -349,10 +349,16 @@ default_prompt(N) ->
     %% Don't bother flattening the list irrespective of what the
     %% I/O-protocol states.
     case is_alive() of
-	true  -> io_lib:format(<<"(~s)~w> ">>, [node(), N]);
+	true  -> io_lib:format(<<"(~ts)~w> ">>, [node_string(), N]);
 	false -> io_lib:format(<<"~w> ">>, [N])
     end.
 
+node_string() ->
+    case encoding() of
+        latin1 -> io_lib:write_atom_as_latin1(node());
+        _ ->      io_lib:write_atom(node())
+    end.
+
 %% expand_hist(Expressions, CommandNumber)
 %%  Preprocess the expression list replacing all history list commands
 %%  with their expansions.
@@ -967,10 +973,11 @@ local_func(f, [{var,_,Name}], Bs, _Shell, _RT, _Lf, _Ef) ->
     {value,ok,erl_eval:del_binding(Name, Bs)};
 local_func(f, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) ->
     erlang:raise(error, function_clause, [{shell,f,1}]);
-local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
+local_func(rd, [{atom,_,RecName0},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
     RecDef = expand_value(RecDef0),
     RDs = lists:flatten(erl_pp:expr(RecDef)),
-    Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]),
+    RecName = io_lib:write_atom_as_latin1(RecName0),
+    Attr = lists:concat(["-record(", RecName, ",", RDs, ")."]),
     {ok, Tokens, _} = erl_scan:string(Attr),
     case erl_parse:parse_form(Tokens) of
         {ok,AttrForm} ->
@@ -1417,9 +1424,11 @@ columns() ->
         {ok,N} -> N;
         _ -> 80
     end.
+
 encoding() ->
     [{encoding, Encoding}] = enc(),
     Encoding.
+
 enc() ->
     case lists:keyfind(encoding, 1, io:getopts()) of
 	false -> [{encoding,latin1}]; % should never happen
diff --git a/lib/stdlib/test/shell_SUITE.erl b/lib/stdlib/test/shell_SUITE.erl
index 4864bc3d72..56002dda25 100644
--- a/lib/stdlib/test/shell_SUITE.erl
+++ b/lib/stdlib/test/shell_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2004-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2004-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -30,7 +30,8 @@
 	 progex_bit_syntax/1, progex_records/1, 
 	 progex_lc/1, progex_funs/1,
 	 otp_5990/1, otp_6166/1, otp_6554/1,
-	 otp_7184/1, otp_7232/1, otp_8393/1, otp_10302/1, otp_13719/1]).
+	 otp_7184/1, otp_7232/1, otp_8393/1, otp_10302/1, otp_13719/1,
+         otp_14285/1]).
 
 -export([ start_restricted_from_shell/1, 
 	  start_restricted_on_command_line/1,restricted_local/1]).
@@ -91,7 +92,7 @@ groups() ->
        progex_funs]},
      {tickets, [],
       [otp_5990, otp_6166, otp_6554, otp_7184,
-       otp_7232, otp_8393, otp_10302, otp_13719]}].
+       otp_7232, otp_8393, otp_10302, otp_13719, otp_14285]}].
 
 init_per_suite(Config) ->
     Config.
@@ -2824,6 +2825,22 @@ otp_13719(Config) when is_list(Config) ->
     file:delete(File),
     ok.
 
+otp_14285(Config) ->
+    {ok,Node} = start_node(shell_suite_helper_4,
+			   "-pa "++proplists:get_value(priv_dir,Config)++
+			   " +pc unicode"),
+    Test1 =
+        <<"begin
+               io:setopts([{encoding,utf8}]),
+               [1024] = atom_to_list('\\x{400}'),
+               rd('\\x{400}', {'\\x{400}' = '\\x{400}'}),
+               ok = rl('\\x{400}')
+           end.">>,
+    "-record('\x{400}',{'\x{400}' = '\x{400}'}).\nok.\n" =
+        t({Node,Test1}),
+    test_server:stop_node(Node),
+    ok.
+
 scan(B) ->
     F = fun(Ts) -> 
                 case erl_parse:parse_term(Ts) of
-- 
cgit v1.2.3


From 25271fa55aacf0b367ad74532c952352344ed97d Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Thu, 2 Feb 2017 10:16:07 +0100
Subject: parsetools: Fix Leex regarding Unicode atoms

Notice that macro names are unquoted atoms.
---
 lib/parsetools/doc/src/leex.xml    |  5 ++-
 lib/parsetools/src/leex.erl        | 92 +++++++++++++++++++++++---------------
 lib/parsetools/test/leex_SUITE.erl | 45 +++++++++++++++++--
 3 files changed, 100 insertions(+), 42 deletions(-)

(limited to 'lib')

diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml
index 29d546105f..1227625287 100644
--- a/lib/parsetools/doc/src/leex.xml
+++ b/lib/parsetools/doc/src/leex.xml
@@ -4,7 +4,7 @@
 <erlref>
   <header>
     <copyright>
-      <year>2009</year><year>2016</year>
+      <year>2009</year><year>2017</year>
       <holder>Ericsson AB. All Rights Reserved.</holder>
     </copyright>
     <legalnotice>
@@ -446,7 +446,8 @@ D = [0-9]
       </item>
     </taglist>
  
-    <p>The following examples define Erlang data types:</p>
+    <p>The following examples define simplified versions of a few
+      Erlang data types:</p>
     <code> 
 Atoms [a-z][0-9a-zA-Z_]*
 
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index e0f37ae9df..e2e7d7359f 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -1548,22 +1548,23 @@ out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
     L = erl_scan:line(hd(Code)),
     output_file_directive(File, XrlFile, L-2),
     io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
-    io:fwrite(File, "    ~s\n", [pp_tokens(Code, L)]).
+    io:fwrite(File, "    ~ts\n", [pp_tokens(Code, L, File)]).
 
-%% pp_tokens(Tokens, Line) -> [char()].
+%% pp_tokens(Tokens, Line, File) -> [char()].
 %%  Prints the tokens keeping the line breaks of the original code.
 
-pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none).
+pp_tokens(Tokens, Line0, File) -> pp_tokens(Tokens, Line0, File, none).
     
-pp_tokens([], _Line0, _) -> [];
-pp_tokens([T | Ts], Line0, Prev) ->
+pp_tokens([], _Line0, _, _) -> [];
+pp_tokens([T | Ts], Line0, File, Prev) ->
     Line = erl_scan:line(T),
-    [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)].
+    [pp_sep(Line, Line0, Prev, T),
+     pp_symbol(T, File) | pp_tokens(Ts, Line, File, T)].
 
-pp_symbol({var,_,Var}) -> atom_to_list(Var);
-pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]);
-pp_symbol({dot, _}) -> ".";
-pp_symbol({Symbol, _}) -> atom_to_list(Symbol).
+pp_symbol({var,_,Var}, _) -> atom_to_list(Var);
+pp_symbol({_,_,Symbol}, File) -> format_symbol(Symbol, File);
+pp_symbol({dot, _}, _) -> ".";
+pp_symbol({Symbol, _}, _) -> atom_to_list(Symbol).
 
 pp_sep(Line, Line0, Prev, T) when Line > Line0 -> 
     ["\n    " | pp_sep(Line - 1, Line0, Prev, T)];
@@ -1622,17 +1623,17 @@ out_dfa_edges(File, DFA) ->
                                   end, orddict:new(), Pt),
                     foreach(fun (T) ->
                                     Crs = orddict:fetch(T, Tdict),
-                                    Edgelab = dfa_edgelabel(Crs),
+                                    Edgelab = dfa_edgelabel(Crs, File),
                                     io:fwrite(File, "  ~b -> ~b [label=\"~ts\"];~n",
                                               [S,T,Edgelab])
                             end, sort(orddict:fetch_keys(Tdict)))
             end, DFA).
 
-dfa_edgelabel([C]) when is_integer(C) -> quote(C);
-dfa_edgelabel(Cranges) ->
+dfa_edgelabel([C], File) when is_integer(C) -> quote(C, File);
+dfa_edgelabel(Cranges, File) ->
     %% io:fwrite("el: ~p\n", [Cranges]),
-    "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)];
-                   (C)     -> [quote(C)]
+    "[" ++ map(fun ({A,B}) -> [quote(A, File), "-", quote(B, File)];
+                   (C)     -> [quote(C, File)]
                end, Cranges) ++ "]".
 
 set_encoding(#leex{encoding = none}, File) ->
@@ -1651,33 +1652,50 @@ output_file_directive(File, Filename, Line) ->
 
 format_filename(Filename0, File) ->
     Filename = filename:flatten(Filename0),
+    case enc(File) of
+        unicode -> io_lib:write_string(Filename);
+        latin1  -> io_lib:write_string_as_latin1(Filename)
+    end.
+
+format_symbol(Symbol, File) ->
+    Format = case enc(File) of
+                 latin1  -> "~p";
+                 unicode -> "~tp"
+             end,
+    io_lib:fwrite(Format, [Symbol]).
+
+enc(File) ->
     case lists:keyfind(encoding, 1, io:getopts(File)) of
-        {encoding, unicode} -> io_lib:write_string(Filename);
-        _ ->                   io_lib:write_string_as_latin1(Filename)
+	false -> latin1; % should never happen
+	{encoding, Enc} -> Enc
     end.
 
-quote($^)  -> "\\^";
-quote($.)  -> "\\.";
-quote($$)  -> "\\$";
-quote($-)  -> "\\-";
-quote($[)  -> "\\[";
-quote($])  -> "\\]";
-quote($\s) -> "\\\\s";
-quote($\") -> "\\\"";
-quote($\b) -> "\\\\b";
-quote($\f) -> "\\\\f";
-quote($\n) -> "\\\\n";
-quote($\r) -> "\\\\r";
-quote($\t) -> "\\\\t";
-quote($\e) -> "\\\\e";
-quote($\v) -> "\\\\v";
-quote($\d) -> "\\\\d";
-quote($\\) -> "\\\\";
-quote(C) when is_integer(C) ->
+quote($^, _File)  -> "\\^";
+quote($., _File)  -> "\\.";
+quote($$, _File)  -> "\\$";
+quote($-, _File)  -> "\\-";
+quote($[, _File)  -> "\\[";
+quote($], _File)  -> "\\]";
+quote($\s, _File) -> "\\\\s";
+quote($\", _File) -> "\\\"";
+quote($\b, _File) -> "\\\\b";
+quote($\f, _File) -> "\\\\f";
+quote($\n, _File) -> "\\\\n";
+quote($\r, _File) -> "\\\\r";
+quote($\t, _File) -> "\\\\t";
+quote($\e, _File) -> "\\\\e";
+quote($\v, _File) -> "\\\\v";
+quote($\d, _File) -> "\\\\d";
+quote($\\, _File) -> "\\\\";
+quote(C, File) when is_integer(C) ->
     %% Must remove the $ and get the \'s right.
-    case io_lib:write_char(C) of
+    S = case enc(File) of
+            unicode -> io_lib:write_char(C);
+            latin1  -> io_lib:write_char_as_latin1(C)
+        end,
+    case S of
         [$$,$\\|Cs] -> "\\\\" ++ Cs;
         [$$|Cs] -> Cs
     end;
-quote(maxchar) ->
+quote(maxchar, _File) ->
     "MAXCHAR".
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 54602848ec..3f5d9fee3e 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 2010-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2017. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@
 	 
 	 pt/1, man/1, ex/1, ex2/1, not_yet/1,
 	 line_wrap/1,
-	 otp_10302/1, otp_11286/1, unicode/1, otp_13916/1]).
+	 otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1]).
 
 % Default timetrap timeout (set in init_per_testcase).
 -define(default_timeout, ?t:minutes(1)).
@@ -67,7 +67,7 @@ all() ->
 groups() -> 
     [{checks, [], [file, compile, syntax]},
      {examples, [], [pt, man, ex, ex2, not_yet, unicode]},
-     {tickets, [], [otp_10302, otp_11286, otp_13916]},
+     {tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285]},
      {bugs, [], [line_wrap]}].
 
 init_per_suite(Config) ->
@@ -1131,6 +1131,45 @@ otp_13916(Config) when is_list(Config) ->
     ?line run(Config, Ts),
     ok.
 
+otp_14285(Config) ->
+    Dir = ?privdir,
+    Filename = filename:join(Dir, "file.xrl"),
+
+    Ts = [{otp_14285_1,
+           <<"%% encoding: latin-1\n"
+             "Definitions.\n"
+             "A = a\n"
+             "Z = z\n"
+             "L = [{A}-{Z}]\n"
+             "U = [\\x{400}]\n"
+             "Rules.\n"
+             "{L}+ : {token,l}.\n"
+             "{U}+ : {token,'\\x{400}'}.\n"
+             "Erlang code.\n"
+             "-export([t/0]).\n"
+             "t() ->\n"
+             "    {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>,
+           default,
+           ok},
+          {otp_14285_2,
+           <<"%% encoding: UTF-8\n"
+             "Definitions.\n"
+             "A = a\n"
+             "Z = z\n"
+             "L = [{A}-{Z}]\n"
+             "U = [\x{400}]\n"
+             "Rules.\n"
+             "{L}+ : {token,l}.\n"
+             "{U}+ : {token,'\x{400}'}.\n"
+             "Erlang code.\n"
+             "-export([t/0]).\n"
+             "t() ->\n"
+             "    {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>,
+           default,
+           ok}],
+    run(Config, Ts),
+    ok.
+
 start_node(Name, Args) ->
     [_,Host] = string:tokens(atom_to_list(node()), "@"),
     ct:log("Trying to start ~w@~s~n", [Name,Host]),
-- 
cgit v1.2.3


From 9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261 Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Thu, 23 Mar 2017 16:36:17 +0100
Subject: parsetools: Fix Yecc regarding Unicode atoms

Terminals and non-terminals still need to be quoted, which is a
side-effect of using the Erlang scanner.
---
 lib/parsetools/src/yecc.erl        | 116 +++++++++++++++++++------------------
 lib/parsetools/test/yecc_SUITE.erl |  88 +++++++++++++++++++++++++++-
 2 files changed, 147 insertions(+), 57 deletions(-)

(limited to 'lib')

diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl
index 05446c1a85..48559ec402 100644
--- a/lib/parsetools/src/yecc.erl
+++ b/lib/parsetools/src/yecc.erl
@@ -154,13 +154,13 @@ compile(Input0, Output0,
 format_error(bad_declaration) ->
     io_lib:fwrite("unknown or bad declaration, ignored", []);
 format_error({bad_expect, SymName}) ->
-    io_lib:fwrite("argument ~s of Expect is not an integer", 
+    io_lib:fwrite("argument ~ts of Expect is not an integer", 
                   [format_symbol(SymName)]);
 format_error({bad_rootsymbol, SymName}) ->
-    io_lib:fwrite("rootsymbol ~s is not a nonterminal", 
+    io_lib:fwrite("rootsymbol ~ts is not a nonterminal", 
                   [format_symbol(SymName)]);
 format_error({bad_states, SymName}) ->
-    io_lib:fwrite("argument ~s of States is not an integer", 
+    io_lib:fwrite("argument ~ts of States is not an integer", 
                   [format_symbol(SymName)]);
 format_error({conflict, Conflict}) ->
     format_conflict(Conflict);
@@ -169,19 +169,19 @@ format_error({conflicts, SR, RR}) ->
 format_error({duplicate_declaration, Tag}) ->
     io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]);
 format_error({duplicate_nonterminal, Nonterminal}) ->
-    io_lib:fwrite("duplicate non-terminals ~s", 
+    io_lib:fwrite("duplicate non-terminals ~ts", 
                   [format_symbol(Nonterminal)]);
 format_error({duplicate_precedence, Op}) ->
-    io_lib:fwrite("duplicate precedence operator ~s", 
+    io_lib:fwrite("duplicate precedence operator ~ts", 
                   [format_symbol(Op)]);
 format_error({duplicate_terminal, Terminal}) ->
-    io_lib:fwrite("duplicate terminal ~s", 
+    io_lib:fwrite("duplicate terminal ~ts", 
                   [format_symbol(Terminal)]);
 format_error({endsymbol_is_nonterminal, Symbol}) ->
-    io_lib:fwrite("endsymbol ~s is a nonterminal", 
+    io_lib:fwrite("endsymbol ~ts is a nonterminal", 
                   [format_symbol(Symbol)]);
 format_error({endsymbol_is_terminal, Symbol}) ->
-    io_lib:fwrite("endsymbol ~s is a terminal", 
+    io_lib:fwrite("endsymbol ~ts is a terminal", 
                   [format_symbol(Symbol)]);
 format_error({error, Module, Error}) ->
     Module:format_error(Error);
@@ -192,7 +192,7 @@ format_error(illegal_empty) ->
 format_error({internal_error, Error}) ->
     io_lib:fwrite("internal yecc error: ~w", [Error]);
 format_error({missing_syntax_rule, Nonterminal}) ->
-    io_lib:fwrite("no syntax rule for non-terminal symbol ~s",
+    io_lib:fwrite("no syntax rule for non-terminal symbol ~ts",
                   [format_symbol(Nonterminal)]);
 format_error({n_states, Exp, N}) ->
     io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]);
@@ -201,31 +201,31 @@ format_error(no_grammar_rules) ->
 format_error(nonterminals_missing) ->
     io_lib:fwrite("Nonterminals is missing", []);
 format_error({precedence_op_is_endsymbol, SymName}) ->
-    io_lib:fwrite("precedence operator ~s is endsymbol",
+    io_lib:fwrite("precedence operator ~ts is endsymbol",
                   [format_symbol(SymName)]);
 format_error({precedence_op_is_unknown, SymName}) ->
-    io_lib:fwrite("unknown precedence operator ~s",
+    io_lib:fwrite("unknown precedence operator ~ts",
                   [format_symbol(SymName)]);
 format_error({reserved, N}) ->
     io_lib:fwrite("the use of ~w should be avoided", [N]);
 format_error({symbol_terminal_and_nonterminal, SymName}) ->
-    io_lib:fwrite("symbol ~s is both a terminal and nonterminal",
+    io_lib:fwrite("symbol ~ts is both a terminal and nonterminal",
                   [format_symbol(SymName)]);
 format_error(rootsymbol_missing) ->
     io_lib:fwrite("Rootsymbol is missing", []);
 format_error(terminals_missing) ->
     io_lib:fwrite("Terminals is missing", []);
 format_error({undefined_nonterminal, Symbol}) ->
-    io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]);
+    io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]);
 format_error({undefined_pseudo_variable, Atom}) ->
     io_lib:fwrite("undefined pseudo variable ~w", [Atom]);
 format_error({undefined_symbol, SymName}) ->
-    io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]);
+    io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]);
 format_error({unused_nonterminal, Nonterminal}) ->
-    io_lib:fwrite("non-terminal symbol ~s not used", 
+    io_lib:fwrite("non-terminal symbol ~ts not used", 
                   [format_symbol(Nonterminal)]);
 format_error({unused_terminal, Terminal}) ->
-    io_lib:fwrite("terminal symbol ~s not used", 
+    io_lib:fwrite("terminal symbol ~ts not used", 
                   [format_symbol(Terminal)]);
 format_error({bad_symbol, String}) ->
     io_lib:fwrite("bad symbol ~ts", [String]);
@@ -1809,9 +1809,9 @@ report_conflict(Conflict, St, ActionName, How) ->
             Formated = format_symbol(ActionName),
             case How of 
                 prec ->
-                    io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]);
+                    io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]);
                 default ->
-                    io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>, 
+                    io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>, 
                               [Formated])
             end;
         true ->
@@ -1856,7 +1856,7 @@ format_conflict({Symbol, N, _, {one_level_up,
                                 {L1, RuleN1, {P1, Ass1}}, 
                                 {L2, RuleN2, {P2, Ass2}}}}) ->
     S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when "
-                         "scanning ~s in state ~w:\n">>, 
+                         "scanning ~ts in state ~w:\n">>, 
                        [format_symbol(Symbol), N]),
     S2 = io_lib:fwrite(<<"   ~s ~w (rule ~w at line ~w)\n"
                           "      vs.\n">>,
@@ -1866,26 +1866,26 @@ format_conflict({Symbol, N, _, {one_level_up,
     [S1, S2, S3];
 format_conflict({Symbol, N, Reduce, Confl}) ->
     S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol "
-                         "~s in state ~w:\n">>, [format_symbol(Symbol), N]),
+                         "~ts in state ~w:\n">>, [format_symbol(Symbol), N]),
     S2 = case Reduce of
              {[HR | TR], RuleNmbr, RuleLine} ->
-                 io_lib:fwrite(<<"   Reduce to ~s from ~s (rule ~w at "
+                 io_lib:fwrite(<<"   Reduce to ~ts from ~ts (rule ~w at "
                                  "line ~w)\n      vs.\n">>,
                                [format_symbol(HR), format_symbols(TR), 
                                 RuleNmbr, RuleLine])
          end,
     S3 = case Confl of 
              {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} ->
-                 io_lib:fwrite(<<"   reduce to ~s from ~s "
+                 io_lib:fwrite(<<"   reduce to ~ts from ~ts "
                                  "(rule ~w at line ~w).">>,
                                [format_symbol(HR2), format_symbols(TR2), 
                                 RuleNmbr2, RuleLine2]);
              {shift, NewState, Sym} ->
                  io_lib:fwrite(<<"   shift to state ~w, adding right "
-                                 "sisters to ~s.">>,
+                                 "sisters to ~ts.">>,
                                [NewState, format_symbol(Sym)]);
              {accept, Rootsymbol} ->
-                 io_lib:fwrite(<<"   reduce to rootsymbol ~s.">>,
+                 io_lib:fwrite(<<"   reduce to rootsymbol ~ts.">>,
                                [format_symbol(Rootsymbol)])
          end,
     [S1, S2, S3].
@@ -1926,8 +1926,9 @@ format_conflict({Symbol, N, Reduce, Confl}) ->
 
 -define(CODE_VERSION, "1.4").
 -define(YECC_BUG(M, A), 
-        iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
-                          io_lib:fwrite(M, A), "}).\n\n"])).
+        unicode:characters_to_binary(
+          [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
+           io_lib:fwrite(M, A), "}).\n\n"])).
 
 %% Returns number of newlines in included files.
 output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
@@ -1980,7 +1981,7 @@ output_header(St0) ->
 output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
     output_goto(St, Go, StateInfo);
 output_goto(St0, [{Nonterminal, List} | Go], StateInfo) ->
-    F = function_name(yeccgoto, Nonterminal),
+    F = function_name(St0, yeccgoto, Nonterminal),
     St05 = fwrite(St0, <<"-dialyzer({nowarn_function, ~w/7}).\n">>, [F]),
     St10 = output_goto1(St05, List, F, StateInfo, true),
     St = output_goto_fini(F, Nonterminal, St10),
@@ -2018,7 +2019,8 @@ output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) ->
     St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>,
                 [F]),
     fwrite(St, 
-           ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]),
+           ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>,
+                     [quoted_atom(St0, NT)]),
            []);
 output_goto_fini(_F, _NT, St) ->
     fwrite(St, <<".\n\n">>, []).
@@ -2027,7 +2029,7 @@ output_goto_fini(_F, _NT, St) ->
 find_user_code(ParseActions, St) ->
     [#user_code{state = State, 
                 terminal = Terminal, 
-                funname = inlined_function_name(State, Terminal), 
+                funname = inlined_function_name(St, State, Terminal),
                 action = Action} || 
         {State, La_actions} <- ParseActions,
         {Action, Terminals, RuleNmbr, NmbrOfDaughters} 
@@ -2148,14 +2150,14 @@ output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) ->
     output_reduce(St, State, Terminal, Action, IsFirst, SI);
 output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) ->
     St10 = delim(St0, IsFirst),
-    St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
-                [State, quoted_atom(Terminal)]),
+    St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
+                [State, quoted_atom(St10, Terminal)]),
     output_call_to_includefile(NewState, St);
 output_action(St0, State, Terminal, accept, IsFirst, _SI) ->
     St10 = delim(St0, IsFirst),
     St = fwrite(St10, 
-                <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
-                [State, quoted_atom(Terminal)]),
+                <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
+                [State, quoted_atom(St10, Terminal)]),
     fwrite(St, <<" {ok, hd(Stack)}">>, []);
 output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) ->
     St.
@@ -2174,19 +2176,19 @@ output_state_actions_fini(State, IsFirst, St0) ->
     St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]),
     fwrite(St, <<" yeccerror(T).\n\n">>, []).
 
-output_reduce(St0, State, Terminal0, 
+output_reduce(St0, State, Terminal,
               #reduce{rule_nmbr = RuleNmbr, 
                       head = Head, 
                       nmbr_of_daughters = NmbrOfDaughters},
               IsFirst, StateInfo) ->
     St10 = delim(St0, IsFirst),
-    Terminal = if 
-                   is_atom(Terminal0) -> quoted_atom(Terminal0);
-                   true -> Terminal0
-               end,
+    QuotedTerminal = if 
+                         is_atom(Terminal) -> quoted_atom(St10, Terminal);
+                         true -> Terminal
+                     end,
     St20 = fwrite(St10,
-                  <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
-                  [State, Terminal]),
+                  <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
+                  [State, QuotedTerminal]),
     St30 = 
         if
             NmbrOfDaughters < 2 ->
@@ -2205,7 +2207,7 @@ output_reduce(St0, State, Terminal0,
                _ ->
                    NewStack = "NewStack",
                    fwrite(St30, <<" NewStack = ~w(Stack),\n">>, 
-                          [inlined_function_name(State, Terminal0)])
+                          [inlined_function_name(St30, State, Terminal)])
                end,
     if 
         NmbrOfDaughters =:= 0 ->
@@ -2221,13 +2223,13 @@ output_reduce(St0, State, Terminal0,
             St = fwrite(St40, <<"~s">>, [C]),
             %% Short-circuit call to yeccpars2:
             fwrite(St,
-                   <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>,
-                   [Repr, NextS, Terminal, State, NewStack]);
+                   <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>,
+                   [Repr, NextS, QuotedTerminal, State, NewStack]);
         true ->
             fwrite(St40, 
-                   <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>,
-                   [function_name(yeccgoto, Head), Ns,
-                    Terminal, Ns, NewStack])
+                   <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>,
+                   [function_name(St40, yeccgoto, Head), Ns,
+                    QuotedTerminal, Ns, NewStack])
     end.
 
 delim(St, true) ->
@@ -2235,8 +2237,10 @@ delim(St, true) ->
 delim(St, false) ->
     fwrite(St, <<";\n">>, []).
 
-quoted_atom(Atom) ->
-    io_lib:fwrite(<<"~w">>, [Atom]).
+quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) ->
+    io_lib:write_atom_as_latin1(Atom);
+quoted_atom(_St, Atomic) ->
+    io_lib:write(Atomic).
     
 output_inlined(St, UserCodeActions, Infile) ->
     foldl(fun(#user_code{funname = InlinedFunctionName, 
@@ -2288,14 +2292,16 @@ output_inlined(St0, FunctionName, Reduce, Infile) ->
     fwrite(St, <<" [begin\n  ~ts\n  end | ~s].\n\n">>,
            [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]).
 
-inlined_function_name(State, "Cat") ->
-    inlined_function_name(State, "");
-inlined_function_name(State, Terminal) ->
-    list_to_atom(concat([yeccpars2_, State, '_', Terminal])).
+inlined_function_name(St, State, Terminal) ->
+    End = case Terminal of
+              "Cat" -> [];
+              _ -> [quoted_atom(St, Terminal)]
+          end,
+    list_to_atom(concat([yeccpars2_, State, '_'] ++ End)).
 
--compile({nowarn_unused_function,function_name/2}).
-function_name(Name, Suf) ->
-    list_to_atom(concat([Name, '_' | quoted_atom(Suf)])).
+-compile({nowarn_unused_function,function_name/3}).
+function_name(St, Name, Suf) ->
+    list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])).
 
 rule(RulePointer, St) ->
     #rule{n = N, anno = Anno, symbols = Symbols} =
diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl
index 2c37278d4b..a7166b91ed 100644
--- a/lib/parsetools/test/yecc_SUITE.erl
+++ b/lib/parsetools/test/yecc_SUITE.erl
@@ -50,7 +50,7 @@
 	 otp_5369/1, otp_6362/1, otp_7945/1, otp_8483/1, otp_8486/1,
 	 
 	 otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1, otp_11269/1,
-         otp_11286/1]).
+         otp_11286/1, otp_14285/1]).
 
 % Default timetrap timeout (set in init_per_testcase).
 -define(default_timeout, ?t:minutes(1)).
@@ -78,7 +78,7 @@ groups() ->
      {bugs, [],
       [otp_5369, otp_6362, otp_7945, otp_8483, otp_8486]},
      {improvements, [], [otp_7292, otp_7969, otp_8919, otp_10302,
-                         otp_11269, otp_11286]}].
+                         otp_11269, otp_11286, otp_14285]}].
 
 init_per_suite(Config) ->
     Config.
@@ -2048,6 +2048,90 @@ otp_11286(Config) when is_list(Config) ->
     true = test_server:stop_node(Node),
     ok.
 
+otp_14285(Config) ->
+    Dir = ?privdir,
+    YeccPre = filename:join(Dir, "yeccpre.hrl"),
+    ?line ok = file:write_file(YeccPre,
+                               [<<"-export([t/0]).\n">>,my_yeccpre()]),
+
+    T0 = <<"
+        Nonterminals '\\x{400}'. 
+        Terminals t.
+        Rootsymbol '\\x{400}'.
+       '\\x{400}' -> t : '$1'.
+       Erlang code.
+       t() ->
+           L = [{t, 1}],
+           {ok, R} = parse(L),
+           {t, 1} = R,
+           ok.">>,
+    Ts0 = [{otp_14285_1,
+           [<<"%% coding: Latin-1\n">>,T0],YeccPre,ok},
+         {otp_14285_2,
+           [<<"%% coding: coding: UTF-8\n">>,T0],YeccPre,ok}],
+    run(Config, Ts0),
+    file:delete(YeccPre),
+
+    T1 = <<"
+        Nonterminals '1\\x{400}' list 'unused\\x{400}'.
+        Terminals '2\\x{400}'.
+        Rootsymbol '1\\x{400}'.
+
+        '1\\x{400}' -> list : '$1'.
+
+        list -> '2\\x{400}' : '$1'.
+        list -> list '2\\x{400}' : {foo,'\\x{400}'}.
+
+        Erlang code.
+
+        -export([t/0]).
+
+        t() ->
+            L = [{'2\\x{400}', 1}, {'2\\x{400}',2}],
+            {ok, R} = parse(L),
+            {foo,A} = R,
+            '\\x{400}' = A,
+            [1024] = atom_to_list(A),
+            ok.">>,
+
+    Ts1 = [{otp_14285_3,
+            [<<"%% coding: Latin-1\n">>,T1],default,ok},
+           {otp_14285_4,
+            [<<"%% coding: UTF-8\n">>,T1],default,ok}],
+    run(Config, Ts1),
+
+    T2 = <<"
+        Nonterminals E.
+        Terminals '-' '+' '=' id.
+        Rootsymbol E.
+        Endsymbol '\\x{400}'.
+
+        E -> E '=' E : {op, '=', '$1', '$3'}.
+        E -> E '+' E  : {op, '+', '$1', '$3'}.
+        E -> '-' E : {op, '-', '$2'}.
+        E -> id : '$1'.
+
+        Nonassoc 100 '='.
+        Right 200 '+' '-'.
+
+        Erlang code.
+
+        -export([t/0]).
+
+        t() ->
+            {ok,{op,'=',{id,1},{op,'-',{op,'+',{id,4},{id,6}}}}} = 
+                parse([{id,1},{'=',2},{'-',3},{id,4},{'+',5},{id,6},
+                      {'\\x{400}',1}]),
+            ok.">>,
+
+    Ts2 = [{otp_14285_5,
+            [<<"%% coding: Latin-1\n">>,T2],default,ok},
+           {otp_14285_6,
+            [<<"%% coding: UTF-8\n">>,T2],default,ok}],
+    run(Config, Ts2),
+
+    ok.
+
 start_node(Name, Args) ->
     [_,Host] = string:tokens(atom_to_list(node()), "@"),
     ct:log("Trying to start ~w@~s~n", [Name,Host]),
-- 
cgit v1.2.3


From 15a631cb2ec70860bc58492020904b1b16fed5c4 Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Wed, 29 Mar 2017 15:48:27 +0200
Subject: edoc: Fix EDoc regarding Unicode atoms

Also extended the scanner to handle the \x{...} syntax.

Capitalizing now works with a few more characters.
---
 lib/edoc/src/edoc.erl                      |   2 +-
 lib/edoc/src/edoc_doclet.erl               |   2 +-
 lib/edoc/src/edoc_extract.erl              |  12 +-
 lib/edoc/src/edoc_layout.erl               | 299 +++++++++++++++--------------
 lib/edoc/src/edoc_report.erl               |   2 +-
 lib/edoc/src/edoc_scanner.erl              |  30 +++
 lib/edoc/src/edoc_specs.erl                |  13 +-
 lib/edoc/test/edoc_SUITE.erl               |  20 +-
 lib/edoc/test/edoc_SUITE_data/un_atom1.erl |  41 ++++
 lib/edoc/test/edoc_SUITE_data/un_atom2.erl |  40 ++++
 10 files changed, 311 insertions(+), 150 deletions(-)
 create mode 100644 lib/edoc/test/edoc_SUITE_data/un_atom1.erl
 create mode 100644 lib/edoc/test/edoc_SUITE_data/un_atom2.erl

(limited to 'lib')

diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl
index 7276a57268..b641118c5d 100644
--- a/lib/edoc/src/edoc.erl
+++ b/lib/edoc/src/edoc.erl
@@ -197,7 +197,7 @@ application(App, Dir, Options) when is_atom(App) ->
 			     ?OVERVIEW_FILE),
     Opts = Options ++ [{source_path, [Src]},
 		       subpackages,
-		       {title, io_lib:fwrite("The ~s application", [App])},
+		       {title, io_lib:fwrite("The ~ts application", [App])},
 		       {overview, Overview},
 		       {dir, filename:join(Dir, ?EDOC_DIR)},
 		       {includes, [filename:join(Dir, "include")]}],
diff --git a/lib/edoc/src/edoc_doclet.erl b/lib/edoc/src/edoc_doclet.erl
index 006b07574b..6e17ec0af0 100644
--- a/lib/edoc/src/edoc_doclet.erl
+++ b/lib/edoc/src/edoc_doclet.erl
@@ -152,7 +152,7 @@ title(App, Options) ->
 			if App == ?NO_APP ->
 				"Overview";
 			   true ->
-				io_lib:fwrite("Application: ~s", [App])
+				io_lib:fwrite("Application: ~ts", [App])
 			end).
 
 
diff --git a/lib/edoc/src/edoc_extract.erl b/lib/edoc/src/edoc_extract.erl
index 68edad1a3e..390851e9ef 100644
--- a/lib/edoc/src/edoc_extract.erl
+++ b/lib/edoc/src/edoc_extract.erl
@@ -488,8 +488,15 @@ find_names([P | Ps], Ns) ->
 	    find_names([P1 | Ps], Ns);
 	record_expr ->
 	    A = erl_syntax:record_expr_type(P),
-	    N = list_to_atom(capitalize(erl_syntax:atom_name(A))),
-	    find_names(Ps, [N | Ns]);
+            AtomName = erl_syntax:atom_name(A),
+            Atom = list_to_atom(AtomName),
+            case AtomName =:= lists:flatten(io_lib:write_atom(Atom)) of
+                true ->
+                    N = list_to_atom(capitalize(AtomName)),
+                    find_names(Ps, [N | Ns]);
+                false ->
+                    find_names(Ps, Ns)
+            end;
 	infix_expr ->
 	    %% this can only be a '++' operation
 	    P1 = erl_syntax:infix_expr_right(P),
@@ -540,6 +547,7 @@ tidy_name_1(Cs) -> [$_ | Cs].
 %% Change initial character from lowercase to uppercase.
 
 capitalize([C | Cs]) when C >= $a, C =< $z -> [C - 32 | Cs];
+capitalize([C | Cs]) when C >= $\340, C =< $\376, C /= $\367 -> [C - 32 | Cs];
 capitalize(Cs) -> Cs.
 
 %% Collects the tags belonging to each entry, checks them, expands
diff --git a/lib/edoc/src/edoc_layout.erl b/lib/edoc/src/edoc_layout.erl
index 5ef210980c..eafab0588e 100644
--- a/lib/edoc/src/edoc_layout.erl
+++ b/lib/edoc/src/edoc_layout.erl
@@ -109,14 +109,20 @@ module(Element, Options) ->
                stylesheet,
                index_columns,
                sort_functions,
+               encoding,
                pretty_printer}).
 
 init_opts(Element, Options) ->
+    Encoding = case get_attrval(encoding, Element) of
+                   "latin1" -> latin1;
+                   _ -> utf8
+               end,
     R = #opts{root = get_attrval(root, Element),
 	      index_columns = proplists:get_value(index_columns,
 						  Options, 1),
 	      sort_functions = proplists:get_value(sort_functions,
 						   Options, true),
+              encoding = Encoding,
               pretty_printer = proplists:get_value(pretty_printer,
                                                    Options, '')
 	     },
@@ -183,8 +189,9 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) ->
     Desc = get_content(description, Es),
     ShortDesc = get_content(briefDescription, Desc),
     FullDesc = get_content(fullDescription, Desc),
-    Functions = [{function_name(E), E} || E <- get_content(functions, Es)],
-    Types = [{type_name(E), E} || E <- get_content(typedecls, Es)],
+    Functions = [{function_name(E, Opts), E} ||
+                    E <- get_content(functions, Es)],
+    Types = [{type_name(E, Opts), E} || E <- get_content(typedecls, Es)],
     SortedFs = if Opts#opts.sort_functions -> lists:sort(Functions);
                   true -> Functions
                end,
@@ -198,7 +205,7 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) ->
 	    ++ [?NL]
 	    ++ version(Es)
 	    ++ since(Es)
-	    ++ behaviours(Es, Name)
+	    ++ behaviours(Es, Name, Opts)
 	    ++ authors(Es)
 	    ++ references(Es)
 	    ++ sees(Es)
@@ -215,7 +222,7 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) ->
 	    ++ [hr, ?NL]
 	    ++ navigation("bottom")
 	    ++ footer()),
-    Encoding = get_attrval(encoding, E),
+    Encoding = Opts#opts.encoding,
     xhtml(Title, stylesheet(Opts), Body, Encoding).
 
 module_params(Es) ->
@@ -367,7 +374,7 @@ function(Name, E=#xmlElement{content = Es}, Opts) ->
 	    case typespec(get_content(typespec, Es), Opts) of
 		[] ->
 		    signature(get_content(args, Es),
-			      get_attrval(name, E));
+			      atom(get_attrval(name, E), Opts));
 		Spec -> Spec
 	    end},
 	   ?NL]
@@ -387,8 +394,8 @@ function(Name, E=#xmlElement{content = Es}, Opts) ->
      ++ sees(Es)
      ++ todos(Es)).
 
-function_name(E) ->
-    atom(get_attrval(name, E)) ++ "/" ++ get_attrval(arity, E).
+function_name(E, Opts) ->
+    atom(get_attrval(name, E), Opts) ++ "/" ++ get_attrval(arity, E).
 
 function_header(Name, E, Private) ->
     case is_exported(E) of
@@ -449,7 +456,7 @@ throws(Es, Opts) ->
 	[] -> [];
 	Es1 ->
             %% Doesn't use format_type; keep it short!
-	    [{p, (["throws ", {tt, t_utype(get_elem(type, Es1))}]
+	    [{p, (["throws ", {tt, t_utype(get_elem(type, Es1), Opts)}]
 		  ++ local_defs(get_elem(localdef, Es1), Opts))},
 	     ?NL]
     end.
@@ -458,7 +465,7 @@ throws(Es, Opts) ->
 
 typespec([], _Opts) -> [];
 typespec(Es, Opts) ->
-    Name = t_name(get_elem(erlangName, Es)),
+    Name = t_name(get_elem(erlangName, Es), Opts),
     Defs = get_elem(localdef, Es),
     [Type] = get_elem(type, Es),
     format_spec(Name, Type, Defs, Opts) ++ local_defs(Defs, Opts).
@@ -479,12 +486,12 @@ typedecl(Name, E=#xmlElement{content = Es}, Opts) ->
      ++ [{p, typedef(get_content(typedef, Es), Opts)}, ?NL]
      ++ fulldesc(Es)).
 
-type_name(#xmlElement{content = Es}) ->
-    t_name(get_elem(erlangName, get_content(typedef, Es))).
+type_name(#xmlElement{content = Es}, Opts) ->
+    t_name(get_elem(erlangName, get_content(typedef, Es)), Opts).
 
 typedef(Es, Opts) ->
-    Name = ([t_name(get_elem(erlangName, Es)), "("]
-            ++ seq(fun t_utype_elem/1, get_content(argtypes, Es), [")"])),
+    Name = ([t_name(get_elem(erlangName, Es), Opts), "("]
+            ++ seq(t_utype_elem_fun(Opts), get_content(argtypes, Es), [")"])),
     (case get_elem(type, Es) of
  	 [] -> [{b, ["abstract datatype"]}, ": ", {tt, Name}];
          Type -> format_type(Name, Name, Type, [], Opts)
@@ -505,7 +512,9 @@ local_defs(Es0, Last, Opts) ->
 localdef(E = #xmlElement{content = Es}, Last, Opts) ->
     Name = case get_elem(typevar, Es) of
                [] ->
-                   label_anchor(N0 = t_abstype(get_content(abstype, Es)), E);
+                   label_anchor(N0 = t_abstype(get_content(abstype, Es),
+                                               Opts),
+                                E);
                [V] ->
                    N0 = t_var(V)
            end,
@@ -516,97 +525,99 @@ localdef(E = #xmlElement{content = Es}, Last, Opts) ->
 %% (fast) Erlang pretty printer).
 format_spec(Name, Type, Defs, #opts{pretty_printer = erl_pp}=Opts) ->
     try
-        L = t_clause(Name, Type),
-        O = pp_clause(Name, Type),
-        {R, ".\n"} = etypef(L, O),
+        L = t_clause(Name, Type, Opts),
+        O = pp_clause(Name, Type, Opts),
+        {R, ".\n"} = etypef(L, O, Opts),
         [{pre, R}]
     catch _:_ ->
         %% Should not happen.
         format_spec(Name, Type, Defs, Opts#opts{pretty_printer=''})
     end;
-format_spec(Sep, Type, Defs, _Opts) ->
+format_spec(Sep, Type, Defs, Opts) ->
     %% Very limited formatting.
     Br = if Defs =:= [] -> br; true -> [] end,
-    [{tt, t_clause(Sep, Type)}, Br].
+    [{tt, t_clause(Sep, Type, Opts)}, Br].
 
-t_clause(Name, Type) ->
+t_clause(Name, Type, Opts) ->
     #xmlElement{content = [#xmlElement{name = 'fun', content = C}]} = Type,
-    [Name] ++ t_fun(C).
+    [Name] ++ t_fun(C, Opts).
 
-pp_clause(Pre, Type) ->
+pp_clause(Pre, Type, Opts) ->
     Types = ot_utype([Type]),
-    Atom = lists:duplicate(iolist_size(Pre), $a),
+    Atom = lists:duplicate(string:length(Pre), $a),
     Attr = {attribute,0,spec,{{list_to_atom(Atom),0},[Types]}},
-    L1 = erl_pp:attribute(erl_parse:new_anno(Attr)),
+    L1 = erl_pp:attribute(erl_parse:new_anno(Attr),
+                          [{encoding, Opts#opts.encoding}]),
     "-spec " ++ L2 = lists:flatten(L1),
     L3 = Pre ++ lists:nthtail(length(Atom), L2),
-    re:replace(L3, "\n      ", "\n", [{return,list},global]).
+    re:replace(L3, "\n      ", "\n", [{return,list},global,unicode]).
 
 format_type(Prefix, Name, Type, Last, #opts{pretty_printer = erl_pp}=Opts) ->
     try
-        L = t_utype(Type),
-        O = pp_type(Name, Type),
-        {R, ".\n"} = etypef(L, O),
+        L = t_utype(Type, Opts),
+        O = pp_type(Name, Type, Opts),
+        {R, ".\n"} = etypef(L, O, Opts),
         [{pre, Prefix ++ [" = "] ++ R ++ Last}]
     catch _:_ ->
         %% Example: "t() = record(a)."
         format_type(Prefix, Name, Type, Last, Opts#opts{pretty_printer =''})
     end;
-format_type(Prefix, _Name, Type, Last, _Opts) ->
-    [{tt, Prefix ++ [" = "] ++ t_utype(Type) ++ Last}].
+format_type(Prefix, _Name, Type, Last, Opts) ->
+    [{tt, Prefix ++ [" = "] ++ t_utype(Type, Opts) ++ Last}].
 
-pp_type(Prefix, Type) ->
-    Atom = list_to_atom(lists:duplicate(iolist_size(Prefix), $a)),
+pp_type(Prefix, Type, Opts) ->
+    Atom = list_to_atom(lists:duplicate(string:length(Prefix), $a)),
     Attr = {attribute,0,type,{Atom,ot_utype(Type),[]}},
-    L1 = erl_pp:attribute(erl_parse:new_anno(Attr)),
+    L1 = erl_pp:attribute(erl_parse:new_anno(Attr),
+                          [{encoding, Opts#opts.encoding}]),
     {L2,N} = case lists:dropwhile(fun(C) -> C =/= $: end, lists:flatten(L1)) of
                  ":: " ++ L3 -> {L3,9}; % compensation for extra "()" and ":"
                  "::\n" ++ L3 -> {"\n"++L3,6}
              end,
     Ss = lists:duplicate(N, $\s),
-    re:replace(L2, "\n"++Ss, "\n", [{return,list},global]).
+    re:replace(L2, "\n"++Ss, "\n", [{return,list},global,unicode]).
 
-etypef(L, O0) ->
-    {R, O} = etypef(L, [], O0, []),
+etypef(L, O0, Opts) ->
+    {R, O} = etypef(L, [], O0, [], Opts),
     {lists:reverse(R), O}.
 
-etypef([C | L], St, [C | O], R) ->
-    etypef(L, St, O, [[C] | R]);
-etypef(" "++L, St, O, R) ->
-    etypef(L, St, O, R);
-etypef("", [Cs | St], O, R) ->
-    etypef(Cs, St, O, R);
-etypef("", [], O, R) ->
+etypef([C | L], St, [C | O], R, Opts) ->
+    etypef(L, St, O, [[C] | R], Opts);
+etypef(" "++L, St, O, R, Opts) ->
+    etypef(L, St, O, R, Opts);
+etypef("", [Cs | St], O, R, Opts) ->
+    etypef(Cs, St, O, R, Opts);
+etypef("", [], O, R, _Opts) ->
     {R, O};
-etypef(L, St, " "++O, R) ->
-    etypef(L, St, O, [" " | R]);
-etypef(L, St, "\n"++O, R) ->
+etypef(L, St, " "++O, R, Opts) ->
+    etypef(L, St, O, [" " | R], Opts);
+etypef(L, St, "\n"++O, R, Opts) ->
     Ss = lists:takewhile(fun(C) -> C =:= $\s end, O),
-    etypef(L, St, lists:nthtail(length(Ss), O), ["\n"++Ss | R]);
-etypef([{a, HRef, S0} | L], St, O0, R) ->
-    {S, O} = etypef(S0, app_fix(O0)),
-    etypef(L, St, O, [{a, HRef, S} | R]);
-etypef("="++L, St, "::"++O, R) ->
+    etypef(L, St, lists:nthtail(length(Ss), O), ["\n"++Ss | R], Opts);
+etypef([{a, HRef, S0} | L], St, O0, R, Opts) ->
+    {S, O} = etypef(S0, app_fix(O0, Opts), Opts),
+    etypef(L, St, O, [{a, HRef, S} | R], Opts);
+etypef("="++L, St, "::"++O, R, Opts) ->
     %% EDoc uses "=" for record field types; Erlang types use "::".
     %% Maybe there should be an option for this, possibly affecting
     %% other similar discrepancies.
-    etypef(L, St, O, ["=" | R]);
-etypef([Cs | L], St, O, R) ->
-    etypef(Cs, [L | St], O, R).
+    etypef(L, St, O, ["=" | R], Opts);
+etypef([Cs | L], St, O, R, Opts) ->
+    etypef(Cs, [L | St], O, R, Opts).
 
-app_fix(L) ->
+app_fix(L, Opts) ->
     try
-        {"//" ++ R1,L2} = app_fix(L, 1),
+        {"//" ++ R1,L2} = app_fix1(L, 1),
         [App, Mod] = string:tokens(R1, "/"),
-        "//" ++ atom(App) ++ "/" ++ atom(Mod) ++ L2
+        "//" ++ atom(App, Opts) ++ "/" ++ atom(Mod, Opts) ++ L2
     catch _:_ -> L
     end.
 
-app_fix(L, I) -> % a bit slow
+app_fix1(L, I) -> % a bit slow
     {L1, L2} = lists:split(I, L),
     case erl_scan:tokens([], L1 ++ ". ", 1) of
         {done, {ok,[{atom,_,Atom}|_],_}, _} -> {atom_to_list(Atom), L2};
-        _ -> app_fix(L, I+1)
+        _ -> app_fix1(L, I+1)
     end.
 
 fulldesc(Es) ->
@@ -703,7 +714,7 @@ deprecated(Es, S) ->
 	     ?NL]
     end.
 
-behaviours(Es, Name) ->
+behaviours(Es, Name, Opts) ->
     CBs = get_content(callbacks, Es),
     OCBs = get_content(optional_callbacks, Es),
     (case get_elem(behaviour, Es) of
@@ -717,17 +728,18 @@ behaviours(Es, Name) ->
      if CBs =:= [], OCBs =:= [] ->
              [];
 	 true ->
+             CBFun = fun(E) -> callback(E, Opts) end,
              Req = if CBs =:= [] ->
                        [];
                        true ->
                            [br, " Required callback functions: "]
-                           ++ seq(fun callback/1, CBs, ["."])
+                           ++ seq(CBFun, CBs, ["."])
                    end,
              Opt = if OCBs =:= [] ->
                        [];
                        true ->
                            [br, " Optional callback functions: "]
-                           ++ seq(fun callback/1, OCBs, ["."])
+                           ++ seq(CBFun, OCBs, ["."])
                    end,
 	     [{p, ([{b, ["This module defines the ", {tt, [Name]},
 			 " behaviour."]}]
@@ -738,10 +750,10 @@ behaviours(Es, Name) ->
 behaviour(E=#xmlElement{content = Es}) ->
     see(E, [{tt, Es}]).
 
-callback(E=#xmlElement{}) ->
+callback(E=#xmlElement{}, Opts) ->
     Name = get_attrval(name, E),
     Arity = get_attrval(arity, E),
-    [{tt, [Name, "/", Arity]}].
+    [{tt, [atom(Name, Opts), "/", Arity]}].
 
 authors(Es) ->
     case get_elem(author, Es) of
@@ -751,7 +763,9 @@ authors(Es) ->
 	     ?NL]
     end.
 
-atom(String) ->
+atom(String, #opts{encoding = latin1}) ->
+    io_lib:write_atom_as_latin1(list_to_atom(String));
+atom(String, #opts{encoding = utf8}) ->
     io_lib:write_atom(list_to_atom(String)).
 
 %% <!ATTLIST author
@@ -799,70 +813,73 @@ todos(Es) ->
 	     ?NL]
     end.
 
-t_name([E]) ->
+t_name([E], Opts) ->
     N = get_attrval(name, E),
     case get_attrval(module, E) of
-	"" -> atom(N);
+	"" -> atom(N, Opts);
 	M ->
-	    S = atom(M) ++ ":" ++ atom(N),
+	    S = atom(M, Opts) ++ ":" ++ atom(N, Opts),
 	    case get_attrval(app, E) of
 		"" -> S;
-		A -> "//" ++ atom(A) ++ "/" ++ S
+		A -> "//" ++ atom(A, Opts) ++ "/" ++ S
 	    end
     end.
 
-t_utype([E]) ->
-    t_utype_elem(E).
+t_utype([E], Opts) ->
+    t_utype_elem(E, Opts).
+
+t_utype_elem_fun(Opts) ->
+    fun(E) -> t_utype_elem(E, Opts) end.
 
-t_utype_elem(E=#xmlElement{content = Es}) ->
+t_utype_elem(E=#xmlElement{content = Es}, Opts) ->
     case get_attrval(name, E) of
-	"" -> t_type(Es);
+	"" -> t_type(Es, Opts);
 	Name ->
-	    T = t_type(Es),
+	    T = t_type(Es, Opts),
 	    case T of
 		[Name] -> T;    % avoid generating "Foo::Foo"
 		T -> [Name] ++ ["::"] ++ T
 	    end
     end.
 
-t_type([E=#xmlElement{name = typevar}]) ->
+t_type([E=#xmlElement{name = typevar}], _Opts) ->
     t_var(E);
-t_type([E=#xmlElement{name = atom}]) ->
-    t_atom(E);
-t_type([E=#xmlElement{name = integer}]) ->
+t_type([E=#xmlElement{name = atom}], Opts) ->
+    t_atom(E, Opts);
+t_type([E=#xmlElement{name = integer}], _Opts) ->
     t_integer(E);
-t_type([E=#xmlElement{name = range}]) ->
+t_type([E=#xmlElement{name = range}], _Opts) ->
     t_range(E);
-t_type([E=#xmlElement{name = binary}]) ->
+t_type([E=#xmlElement{name = binary}], _Opts) ->
     t_binary(E);
-t_type([E=#xmlElement{name = float}]) ->
+t_type([E=#xmlElement{name = float}], _Opts) ->
     t_float(E);
-t_type([#xmlElement{name = nil}]) ->
+t_type([#xmlElement{name = nil}], _Opts) ->
     t_nil();
-t_type([#xmlElement{name = paren, content = Es}]) ->
-    t_paren(Es);
-t_type([#xmlElement{name = list, content = Es}]) ->
-    t_list(Es);
-t_type([#xmlElement{name = nonempty_list, content = Es}]) ->
-    t_nonempty_list(Es);
-t_type([#xmlElement{name = map, content = Es}]) ->
-    t_map(Es);
-t_type([#xmlElement{name = tuple, content = Es}]) ->
-    t_tuple(Es);
-t_type([#xmlElement{name = 'fun', content = Es}]) ->
-    ["fun("] ++ t_fun(Es) ++ [")"];
-t_type([E = #xmlElement{name = record, content = Es}]) ->
-    t_record(E, Es);
-t_type([E = #xmlElement{name = abstype, content = Es}]) ->
-    t_abstype(E, Es);
-t_type([#xmlElement{name = union, content = Es}]) ->
-    t_union(Es).
+t_type([#xmlElement{name = paren, content = Es}], Opts) ->
+    t_paren(Es, Opts);
+t_type([#xmlElement{name = list, content = Es}], Opts) ->
+    t_list(Es, Opts);
+t_type([#xmlElement{name = nonempty_list, content = Es}], Opts) ->
+    t_nonempty_list(Es, Opts);
+t_type([#xmlElement{name = map, content = Es}], Opts) ->
+    t_map(Es, Opts);
+t_type([#xmlElement{name = tuple, content = Es}], Opts) ->
+    t_tuple(Es, Opts);
+t_type([#xmlElement{name = 'fun', content = Es}], Opts) ->
+    ["fun("] ++ t_fun(Es, Opts) ++ [")"];
+t_type([E = #xmlElement{name = record, content = Es}], Opts) ->
+    t_record(E, Es, Opts);
+t_type([E = #xmlElement{name = abstype, content = Es}], Opts) ->
+    t_abstype(E, Es, Opts);
+t_type([#xmlElement{name = union, content = Es}], Opts) ->
+    t_union(Es, Opts).
 
 t_var(E) ->
     [get_attrval(name, E)].
 
-t_atom(E) ->
-    [get_attrval(value, E)].
+t_atom(E, Opts) ->
+    [atom(get_attrval(value, E), Opts)].
 
 t_integer(E) ->
     [get_attrval(value, E)].
@@ -879,62 +896,64 @@ t_float(E) ->
 t_nil() ->
     ["[]"].
 
-t_paren(Es) ->
-    ["("] ++ t_utype(get_elem(type, Es)) ++ [")"].
+t_paren(Es, Opts) ->
+    ["("] ++ t_utype(get_elem(type, Es), Opts) ++ [")"].
 
-t_list(Es) ->
-    ["["] ++ t_utype(get_elem(type, Es)) ++ ["]"].
+t_list(Es, Opts) ->
+    ["["] ++ t_utype(get_elem(type, Es), Opts) ++ ["]"].
 
-t_nonempty_list(Es) ->
-    ["["] ++ t_utype(get_elem(type, Es)) ++ [", ...]"].
+t_nonempty_list(Es, Opts) ->
+    ["["] ++ t_utype(get_elem(type, Es), Opts) ++ [", ...]"].
 
-t_tuple(Es) ->
-    ["{"] ++ seq(fun t_utype_elem/1, Es, ["}"]).
+t_tuple(Es, Opts) ->
+    ["{"] ++ seq(t_utype_elem_fun(Opts), Es, ["}"]).
 
-t_fun(Es) ->
-    ["("] ++ seq(fun t_utype_elem/1, get_content(argtypes, Es),
-		 [") -> "] ++ t_utype(get_elem(type, Es))).
+t_fun(Es, Opts) ->
+    ["("] ++ seq(t_utype_elem_fun(Opts), get_content(argtypes, Es),
+		 [") -> "] ++ t_utype(get_elem(type, Es), Opts)).
 
-t_map(Es) ->
+t_map(Es, Opts) ->
     Fs = get_elem(map_field, Es),
-    ["#{"] ++ seq(fun t_map_field/1, Fs, ["}"]).
+    ["#{"] ++ seq(fun(E) -> t_map_field(E, Opts) end, Fs, ["}"]).
 
-t_map_field(#xmlElement{content = [K,V]}=E) ->
-    KElem = t_utype_elem(K),
-    VElem = t_utype_elem(V),
+t_map_field(#xmlElement{content = [K,V]}=E, Opts) ->
+    KElem = t_utype_elem(K, Opts),
+    VElem = t_utype_elem(V, Opts),
     AS = case get_attrval(assoc_type, E) of
              "assoc" -> " => ";
              "exact" -> " := "
          end,
     KElem ++ [AS] ++ VElem.
 
-t_record(E, Es) ->
-    Name = ["#"] ++ t_type(get_elem(atom, Es)),
+t_record(E, Es, Opts) ->
+    Name = ["#"] ++ t_type(get_elem(atom, Es), Opts),
     case get_elem(field, Es) of
         [] ->
             see(E, [Name, "{}"]);
         Fs ->
-            see(E, Name) ++ ["{"] ++ seq(fun t_field/1, Fs, ["}"])
+            see(E, Name) ++ ["{"] ++ seq(fun(F) -> t_field(F, Opts) end,
+                                         Fs, ["}"])
     end.
 
-t_field(#xmlElement{content = Es}) ->
-    t_type(get_elem(atom, Es)) ++ [" = "] ++ t_utype(get_elem(type, Es)).
+t_field(#xmlElement{content = Es}, Opts) ->
+    (t_type(get_elem(atom, Es), Opts) ++ [" = "] ++
+     t_utype(get_elem(type, Es), Opts)).
 
-t_abstype(E, Es) ->
-    Name = t_name(get_elem(erlangName, Es)),
+t_abstype(E, Es, Opts) ->
+    Name = t_name(get_elem(erlangName, Es), Opts),
     case get_elem(type, Es) of
         [] ->
             see(E, [Name, "()"]);
         Ts ->
-            see(E, [Name]) ++ ["("] ++ seq(fun t_utype_elem/1, Ts, [")"])
+            see(E, [Name]) ++ ["("] ++ seq(t_utype_elem_fun(Opts), Ts, [")"])
     end.
 
-t_abstype(Es) ->
-    ([t_name(get_elem(erlangName, Es)), "("]
-     ++ seq(fun t_utype_elem/1, get_elem(type, Es), [")"])).
+t_abstype(Es, Opts) ->
+    ([t_name(get_elem(erlangName, Es), Opts), "("]
+     ++ seq(t_utype_elem_fun(Opts), get_elem(type, Es), [")"])).
 
-t_union(Es) ->
-    seq(fun t_utype_elem/1, Es, " | ", []).
+t_union(Es, Opts) ->
+    seq(t_utype_elem_fun(Opts), Es, " | ", []).
 
 seq(F, Es) ->
     seq(F, Es, []).
@@ -989,8 +1008,8 @@ local_label(R) ->
 
 xhtml(Title, CSS, Body, Encoding) ->
     EncString = case Encoding of
-                    "latin1" -> "ISO-8859-1";
-                    _ -> "UTF-8"
+                    latin1 -> "ISO-8859-1";
+                    utf8 -> "UTF-8"
                 end,
     [{html, [?NL,
 	     {head, [?NL,
@@ -1009,11 +1028,11 @@ xhtml(Title, CSS, Body, Encoding) ->
 %% ---------------------------------------------------------------------
 
 type(E) ->
-    type(E, []).
+    Opts = init_opts(E, []),
+    type(E, [], Opts).
 
-type(E, Ds) ->
-    Opts = [],
-    xmerl:export_simple_content(t_utype_elem(E) ++ local_defs(Ds, Opts),
+type(E, Ds, Opts) ->
+    xmerl:export_simple_content(t_utype_elem(E, Opts) ++ local_defs(Ds, Opts),
 				?HTML_EXPORT).
 
 overview(E=#xmlElement{name = overview, content = Es}, Options) ->
@@ -1036,7 +1055,7 @@ overview(E=#xmlElement{name = overview, content = Es}, Options) ->
 	    ++ [?NL, hr]
 	    ++ navigation("bottom")
 	    ++ footer()),
-    Encoding = get_attrval(encoding, E),
+    Encoding = Opts#opts.encoding,
     XML = xhtml(Title, stylesheet(Opts), Body, Encoding),
     xmerl:export_simple(XML, ?HTML_EXPORT, []).
 
@@ -1094,8 +1113,8 @@ ot_var(E) ->
     {var,0,list_to_atom(get_attrval(name, E))}.
 
 ot_atom(E) ->
-    {ok, [{atom,A,Name}], _} = erl_scan:string(get_attrval(value, E), 0),
-    {atom,erl_anno:line(A),Name}.
+    Name = list_to_atom(get_attrval(value, E)),
+    {atom,erl_anno:new(0),Name}.
 
 ot_integer(E) ->
     {integer,0,list_to_integer(get_attrval(value, E))}.
diff --git a/lib/edoc/src/edoc_report.erl b/lib/edoc/src/edoc_report.erl
index ed778c8112..76557ef483 100644
--- a/lib/edoc/src/edoc_report.erl
+++ b/lib/edoc/src/edoc_report.erl
@@ -94,7 +94,7 @@ where({File, footer}) ->
 where({File, header}) ->
     io_lib:fwrite("~ts, in header file: ", [File]);
 where({File, {F, A}}) ->
-    io_lib:fwrite("~ts, function ~s/~w: ", [File, F, A]);
+    io_lib:fwrite("~ts, function ~ts/~w: ", [File, F, A]);
 where([]) ->
     io_lib:fwrite("~s: ", [?APPLICATION]);
 where(File) when is_list(File) ->
diff --git a/lib/edoc/src/edoc_scanner.erl b/lib/edoc/src/edoc_scanner.erl
index f1d5e1d4b9..35d00c6c0e 100644
--- a/lib/edoc/src/edoc_scanner.erl
+++ b/lib/edoc/src/edoc_scanner.erl
@@ -86,6 +86,8 @@ scan1([C|Cs], Toks, Pos) when C >= 0, C =< $  -> 	% Skip blanks
     scan1(Cs, Toks, Pos);
 scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z ->	% Unquoted atom
     scan_atom(C, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\337, C =< $\377, C /= $\367 ->
+    scan_atom(C, Cs, Toks, Pos);
 scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 ->	% Numbers
     scan_number(C, Cs, Toks, Pos);
 scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 ->	% Signed numbers
@@ -96,6 +98,8 @@ scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z ->	% Variables
     scan_variable(C, Cs, Toks, Pos);
 scan1([$_|Cs], Toks, Pos) ->				% Variables
     scan_variable($_, Cs, Toks, Pos);
+scan1([C|Cs], Toks, Pos) when C >= $\300, C =< $\336, C /= $\327 ->
+    scan_variable(C, Cs, Toks, Pos);
 scan1([$$|Cs], Toks, Pos) ->			% Character constant
     case scan_char_const(Cs, Toks, Pos) of
 	{ok, Result} ->
@@ -261,6 +265,15 @@ scan_char([], _Pos) ->
 
 %% The following conforms to Standard Erlang escape sequences.
 
+-define(HEX(C), C >= $0 andalso C =< $9 orelse
+                C >= $A andalso C =< $F orelse
+                C >= $a andalso C =< $f).
+
+-define(UNICODE(C),
+         (C >= 0 andalso C < 16#D800 orelse
+          C > 16#DFFF andalso C < 16#FFFE orelse
+          C > 16#FFFF andalso C =< 16#10FFFF)).
+
 scan_escape([O1, O2, O3 | Cs], Pos) when        % \<1-3> octal digits
   O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
     Val = (O1*8 + O2)*8 + O3 - 73*$0,
@@ -272,6 +285,11 @@ scan_escape([O1, O2 | Cs], Pos) when
 scan_escape([O1 | Cs], Pos) when
   O1 >= $0, O1 =< $7 ->
     {O1 - $0,Cs,Pos};
+scan_escape([$x, ${ | Cs], Pos) ->
+    scan_hex(Cs, Pos, []);
+scan_escape([$x, H1, H2 | Cs], Pos) when ?HEX(H1), ?HEX(H2) ->
+    Val = (H1*16 + H2) - 17*$0,
+    {Val,Cs,Pos};
 scan_escape([$^, C | Cs], Pos) ->    % \^X -> CTL-X
     if C >= $\100, C =< $\137 ->
 	    {C - $\100,Cs,Pos};
@@ -285,6 +303,18 @@ scan_escape([C | Cs], Pos) ->
 scan_escape([], _Pos) ->
     {error, truncated_char}.
 
+scan_hex([C | Cs], Pos, HCs) when ?HEX(C) ->
+    scan_hex(Cs, Pos, [C | HCs]);
+scan_hex([$} | Cs], Pos, HCs) ->
+    case catch erlang:list_to_integer(lists:reverse(HCs), 16) of
+        Val when ?UNICODE(Val) ->
+            {Val,Cs,Pos};
+        _ ->
+            {error, undefined_escape_sequence}
+    end;
+scan_hex(_Cs, _Pos, _HCs) ->
+    {error, undefined_escape_sequence}.
+
 %% Note that we return $\000 for undefined escapes.
 escape_char($b) -> $\010;		% \b = BS
 escape_char($d) -> $\177;		% \d = DEL
diff --git a/lib/edoc/src/edoc_specs.erl b/lib/edoc/src/edoc_specs.erl
index c15dfd328f..fb04bfce0e 100644
--- a/lib/edoc/src/edoc_specs.erl
+++ b/lib/edoc/src/edoc_specs.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -274,12 +274,19 @@ parms([A | As], [D | Ds]) ->
 
 param(#t_paren{type = Type}, Default) ->
     param(Type, Default);
-param(#t_record{name = #t_atom{val = Name}}, _Default) ->
-    list_to_atom(capitalize(atom_to_list(Name)));
+param(#t_record{name = #t_atom{val = Name}}=T, Default) ->
+    AtomList = atom_to_list(Name),
+    case AtomList =:= lists:flatten(io_lib:write_atom(Name)) of
+        true ->
+            list_to_atom(capitalize(AtomList));
+        false ->
+            arg_name(?t_ann(T), Default)
+    end;
 param(T, Default) ->
     arg_name(?t_ann(T), Default).
 
 capitalize([C | Cs]) when C >= $a, C =< $z -> [C - 32 | Cs];
+capitalize([C | Cs]) when C >= $\340, C =< $\376, C /= $\367 -> [C - 32 | Cs];
 capitalize(Cs) -> Cs.
 
 %% Like edoc_types:arg_name/1
diff --git a/lib/edoc/test/edoc_SUITE.erl b/lib/edoc/test/edoc_SUITE.erl
index 4d846ad63d..29ca9d1203 100644
--- a/lib/edoc/test/edoc_SUITE.erl
+++ b/lib/edoc/test/edoc_SUITE.erl
@@ -23,12 +23,13 @@
 	 init_per_group/2,end_per_group/2]).
 
 %% Test cases
--export([app/1,appup/1,build_std/1,build_map_module/1,otp_12008/1, build_app/1]).
+-export([app/1,appup/1,build_std/1,build_map_module/1,otp_12008/1,
+         build_app/1, otp_14285/1]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
-    [app,appup,build_std,build_map_module,otp_12008, build_app].
+    [app,appup,build_std,build_map_module,otp_12008, build_app, otp_14285].
 
 groups() -> 
     [].
@@ -113,3 +114,18 @@ build_app(Config) ->
 	true = filelib:is_regular(filename:join(OutDir, "a.html")),
 	true = filelib:is_regular(filename:join(OutDir, "b.html")),
 	ok.
+
+otp_14285(Config) ->
+    DataDir  = ?config(data_dir, Config),
+    PrivDir  = ?config(priv_dir, Config),
+    Un1 = filename:join(DataDir, "un_atom1.erl"),
+    Un2 = filename:join(DataDir, "un_atom2.erl"),
+    %% epp_dodger
+    Opts1 = [{dir, PrivDir}],
+    ok = edoc:files([Un1], Opts1),
+    ok = edoc:files([Un2], Opts1),
+    %% epp
+    Opts2 = [{preprocess, true}, {dir, PrivDir}],
+    ok = edoc:files([Un1], Opts2),
+    ok = edoc:files([Un2], Opts2),
+    ok.
diff --git a/lib/edoc/test/edoc_SUITE_data/un_atom1.erl b/lib/edoc/test/edoc_SUITE_data/un_atom1.erl
new file mode 100644
index 0000000000..20ca50d5d2
--- /dev/null
+++ b/lib/edoc/test/edoc_SUITE_data/un_atom1.erl
@@ -0,0 +1,41 @@
+%% coding:latin-1
+
+-module(un_atom1).
+
+-export(['\x{aaa}memory'/0, 'func-\x{400}'/1, func/1, �func/1]).
+
+-record('rec-\x{400}', {'field-\x{400}'}).
+
+-type cs() :: $\x{a}
+            | $\x{aa}
+            | $\x{aaa}
+            | $\xaa.
+
+-callback 'callback-\x{400}'() -> 'apa'.
+
+-type 'type-\x{400}'() :: 'atom-\x{400}'
+                        | cs()
+                        | #'rec-\x{400}'{'field-\x{400}' :: 'type-\x{400}'()}.
+
+-spec '\x{aaa}memory'() -> 'type-\x{400}'().
+
+'\x{aaa}memory'() ->
+    apa:foo().
+
+%% @deprecated Please use {@link m:f/1}.
+-spec 'func-\x{400}'(#'rec-\x{400}'{}) -> #'rec-\x{400}'{}.
+
+'func-\x{400}'(_T) ->
+    foo:bar(#'rec-\x{400}'{}).
+
+-record(rec, {}).
+
+-spec func(#rec{}) -> #rec{}.
+
+func(#rec{}) -> #rec{}.
+
+-record(�rec, {}).
+
+-spec �func(#�rec{}) -> #�rec{}.
+
+�func(#�rec{}) -> #�rec{}.
diff --git a/lib/edoc/test/edoc_SUITE_data/un_atom2.erl b/lib/edoc/test/edoc_SUITE_data/un_atom2.erl
new file mode 100644
index 0000000000..66c83e30e0
--- /dev/null
+++ b/lib/edoc/test/edoc_SUITE_data/un_atom2.erl
@@ -0,0 +1,40 @@
+%% coding:utf-8
+-module(un_atom2).
+
+-export(['\x{aaa}memory'/0, 'func-\x{400}'/1, func/1, äfunc/1]).
+
+-record('rec-\x{400}', {'field-\x{400}'}).
+
+-type cs() :: $\x{a}
+            | $\x{aa}
+            | $\x{aaa}
+            | $\xaa.
+
+-callback 'callback-\x{400}'() -> 'apa'.
+
+-type 'type-\x{400}'() :: 'atom-\x{400}'
+                        | cs()
+                        | #'rec-\x{400}'{'field-\x{400}' :: 'type-\x{400}'()}.
+
+-spec '\x{aaa}memory'() -> 'type-\x{400}'().
+
+'\x{aaa}memory'() ->
+    apa:foo().
+
+%% @deprecated Please use {@link m:f/1}.
+-spec 'func-\x{400}'(#'rec-\x{400}'{}) -> #'rec-\x{400}'{}.
+
+'func-\x{400}'(_T) ->
+    foo:bar(#'rec-\x{400}'{}).
+
+-record(rec, {}).
+
+-spec func(#rec{}) -> #rec{}.
+
+func(#rec{}) -> #rec{}.
+
+-record(ärec, {}).
+
+-spec äfunc(#ärec{}) -> #ärec{}.
+
+äfunc(#ärec{}) -> #ärec{}.
-- 
cgit v1.2.3


From 55f4e2b93c92607a5a1c798541f101bae3e9247d Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Thu, 30 Mar 2017 15:30:54 +0200
Subject: debugger: Show Latin-1 code correctly

---
 lib/debugger/src/dbg_wx_code.erl     | 4 ++--
 lib/debugger/src/dbg_wx_src_view.erl | 4 ++--
 lib/debugger/src/int.erl             | 6 ++++--
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/debugger/src/dbg_wx_code.erl b/lib/debugger/src/dbg_wx_code.erl
index 473963500a..bca8a0d241 100644
--- a/lib/debugger/src/dbg_wx_code.erl
+++ b/lib/debugger/src/dbg_wx_code.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -190,6 +190,6 @@ find(Ed, Str, Case, Next) ->
    
 keyWords() ->
     L = ["after","begin","case","try","cond","catch","andalso","orelse",
-	 "end","fun","if","let","of","query","receive","when","bnot","not",
+	 "end","fun","if","let","of","receive","when","bnot","not",
 	 "div","rem","band","and","bor","bxor","bsl","bsr","or","xor"],
     lists:flatten([K ++ " " || K <- L] ++ [0]).
diff --git a/lib/debugger/src/dbg_wx_src_view.erl b/lib/debugger/src/dbg_wx_src_view.erl
index 207c407fbc..ee8eb72407 100644
--- a/lib/debugger/src/dbg_wx_src_view.erl
+++ b/lib/debugger/src/dbg_wx_src_view.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -62,6 +62,6 @@ code_area(Parent, Sizer) ->
 
 keyWords() ->
     L = ["after","begin","case","try","cond","catch","andalso","orelse",
-	 "end","fun","if","let","of","query","receive","when","bnot","not",
+	 "end","fun","if","let","of","receive","when","bnot","not",
 	 "div","rem","band","and","bor","bxor","bsl","bsr","or","xor"],
     lists:flatten([K ++ " " || K <- L] ++ [0]).
diff --git a/lib/debugger/src/int.erl b/lib/debugger/src/int.erl
index e5bade9abe..fdf5957182 100644
--- a/lib/debugger/src/int.erl
+++ b/lib/debugger/src/int.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 1998-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1998-2017. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -533,7 +533,9 @@ load({Mod, Src, Beam, BeamBin, Exp, Abst}, Dist) ->
     case erl_prim_loader:get_file(filename:absname(Src)) of
 	{ok, SrcBin, _} ->
 	    MD5 = code:module_md5(BeamBin),
-	    Bin = term_to_binary({interpreter_module,Exp,Abst,SrcBin,MD5}),
+            SrcBin1 = unicode:characters_to_binary(SrcBin, enc(SrcBin)),
+            true = is_binary(SrcBin1),
+	    Bin = term_to_binary({interpreter_module,Exp,Abst,SrcBin1,MD5}),
 	    {module, Mod} = dbg_iserver:safe_call({load, Mod, Src, Bin}),
 	    _ = everywhere(Dist,
 			   fun() ->
-- 
cgit v1.2.3


From 73657a28e74f8ad12ddb4fea49272a74f5b823c3 Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Fri, 31 Mar 2017 10:17:05 +0200
Subject: syntax_tools: Fix Syntax Tools regarding Unicode atoms

---
 lib/syntax_tools/src/erl_prettypr.erl |  2 +-
 lib/syntax_tools/src/erl_syntax.erl   | 25 ++++++++++++++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/syntax_tools/src/erl_prettypr.erl b/lib/syntax_tools/src/erl_prettypr.erl
index 378d69095d..40ddd2b22a 100644
--- a/lib/syntax_tools/src/erl_prettypr.erl
+++ b/lib/syntax_tools/src/erl_prettypr.erl
@@ -452,7 +452,7 @@ lay_2(Node, Ctxt) ->
 	    text(erl_syntax:variable_literal(Node));
 	
 	atom ->
-	    text(erl_syntax:atom_literal(Node));
+	    text(erl_syntax:atom_literal(Node, Ctxt#ctxt.encoding));
 	
 	integer ->
 	    text(erl_syntax:integer_literal(Node));
diff --git a/lib/syntax_tools/src/erl_syntax.erl b/lib/syntax_tools/src/erl_syntax.erl
index 4347cc46c1..9b2b503762 100644
--- a/lib/syntax_tools/src/erl_syntax.erl
+++ b/lib/syntax_tools/src/erl_syntax.erl
@@ -139,6 +139,7 @@
 	 is_atom/2,
 	 atom_value/1,
 	 atom_literal/1,
+         atom_literal/2,
 	 atom_name/1,
 	 attribute/1,
 	 attribute/2,
@@ -1841,7 +1842,7 @@ char_literal(Node) ->
 %% @doc Returns the literal string represented by a `char'
 %% node. This includes the leading "`$'" character.
 %% Depending on the encoding a character beyond 255 will be escaped
-%% ('latin1') or copied as is ('utf8').
+%% (`latin1') or copied as is (`utf8').
 %%
 %% @see char/1
 
@@ -1944,7 +1945,7 @@ string_literal(Node) ->
 %% @doc Returns the literal string represented by a `string'
 %% node. This includes surrounding double-quote characters.
 %% Depending on the encoding characters beyond 255 will be escaped
-%% ('latin1') or copied as is ('utf8').
+%% (`latin1') or copied as is (`utf8').
 %%
 %% @see string/1
 
@@ -1965,6 +1966,7 @@ string_literal(Node, latin1) ->
 %% @see atom_value/1
 %% @see atom_name/1
 %% @see atom_literal/1
+%% @see atom_literal/2
 %% @see is_atom/2
 
 %% type(Node) = atom
@@ -2037,6 +2039,7 @@ atom_name(Node) ->
 %% =====================================================================
 %% @doc Returns the literal string represented by an `atom'
 %% node. This includes surrounding single-quote characters if necessary.
+%% Characters beyond 255 will be escaped.
 %%
 %% Note that e.g. the result of `atom("x\ny")' represents
 %% any and all of `'x\ny'', `'x\12y'',
@@ -2048,8 +2051,24 @@ atom_name(Node) ->
 -spec atom_literal(syntaxTree()) -> string().
 
 atom_literal(Node) ->
-    io_lib:write_atom(atom_value(Node)).
+    atom_literal(Node, latin1).
+
+%% =====================================================================
+%% @doc Returns the literal string represented by an `atom'
+%% node. This includes surrounding single-quote characters if necessary.
+%% Depending on the encoding a character beyond 255 will be escaped
+%% (`latin1') or copied as is (`utf8').
+%%
+%% @see atom/1
+%% @see atom_literal/1
+%% @see string/1
 
+atom_literal(Node, utf8) ->
+    io_lib:write_atom(atom_value(Node));
+atom_literal(Node, unicode) ->
+    io_lib:write_atom(atom_value(Node));
+atom_literal(Node, latin1) ->
+    io_lib:write_atom_as_latin1(atom_value(Node)).
 
 %% =====================================================================
 %% @equiv map_expr(none, Fields)
-- 
cgit v1.2.3