diff options
author | Hans Bolinder <[email protected]> | 2017-04-10 08:58:25 +0200 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2017-04-10 08:58:25 +0200 |
commit | 710abdb5d79554cde4e0ca051bfa16bdcfdccc57 (patch) | |
tree | 8e99daa1f998f49b0bd78a1937d70c9ca07f59eb /lib | |
parent | 3a29920a05cedee8c1f7501ee7b1aa22e068efed (diff) | |
parent | 73657a28e74f8ad12ddb4fea49272a74f5b823c3 (diff) | |
download | otp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.tar.gz otp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.tar.bz2 otp-710abdb5d79554cde4e0ca051bfa16bdcfdccc57.zip |
Merge branch 'hasse/unicode_atoms/OTP-14285'
* hasse/unicode_atoms/OTP-14285:
syntax_tools: Fix Syntax Tools regarding Unicode atoms
debugger: Show Latin-1 code correctly
edoc: Fix EDoc regarding Unicode atoms
parsetools: Fix Yecc regarding Unicode atoms
parsetools: Fix Leex regarding Unicode atoms
stdlib: Fix Erlang shell regarding Unicode atoms
stdlib: Fix Erlang pretty printer regarding Unicode atoms
stdlib: Add function to io_lib to handle Unicode atoms
Diffstat (limited to 'lib')
29 files changed, 798 insertions, 332 deletions
diff --git a/lib/debugger/src/dbg_wx_code.erl b/lib/debugger/src/dbg_wx_code.erl index 473963500a..bca8a0d241 100644 --- a/lib/debugger/src/dbg_wx_code.erl +++ b/lib/debugger/src/dbg_wx_code.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -190,6 +190,6 @@ find(Ed, Str, Case, Next) -> keyWords() -> L = ["after","begin","case","try","cond","catch","andalso","orelse", - "end","fun","if","let","of","query","receive","when","bnot","not", + "end","fun","if","let","of","receive","when","bnot","not", "div","rem","band","and","bor","bxor","bsl","bsr","or","xor"], lists:flatten([K ++ " " || K <- L] ++ [0]). diff --git a/lib/debugger/src/dbg_wx_src_view.erl b/lib/debugger/src/dbg_wx_src_view.erl index 207c407fbc..ee8eb72407 100644 --- a/lib/debugger/src/dbg_wx_src_view.erl +++ b/lib/debugger/src/dbg_wx_src_view.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -62,6 +62,6 @@ code_area(Parent, Sizer) -> keyWords() -> L = ["after","begin","case","try","cond","catch","andalso","orelse", - "end","fun","if","let","of","query","receive","when","bnot","not", + "end","fun","if","let","of","receive","when","bnot","not", "div","rem","band","and","bor","bxor","bsl","bsr","or","xor"], lists:flatten([K ++ " " || K <- L] ++ [0]). diff --git a/lib/debugger/src/int.erl b/lib/debugger/src/int.erl index e5bade9abe..fdf5957182 100644 --- a/lib/debugger/src/int.erl +++ b/lib/debugger/src/int.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2016. All Rights Reserved. +%% Copyright Ericsson AB 1998-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -533,7 +533,9 @@ load({Mod, Src, Beam, BeamBin, Exp, Abst}, Dist) -> case erl_prim_loader:get_file(filename:absname(Src)) of {ok, SrcBin, _} -> MD5 = code:module_md5(BeamBin), - Bin = term_to_binary({interpreter_module,Exp,Abst,SrcBin,MD5}), + SrcBin1 = unicode:characters_to_binary(SrcBin, enc(SrcBin)), + true = is_binary(SrcBin1), + Bin = term_to_binary({interpreter_module,Exp,Abst,SrcBin1,MD5}), {module, Mod} = dbg_iserver:safe_call({load, Mod, Src, Bin}), _ = everywhere(Dist, fun() -> diff --git a/lib/edoc/src/edoc.erl b/lib/edoc/src/edoc.erl index 7276a57268..b641118c5d 100644 --- a/lib/edoc/src/edoc.erl +++ b/lib/edoc/src/edoc.erl @@ -197,7 +197,7 @@ application(App, Dir, Options) when is_atom(App) -> ?OVERVIEW_FILE), Opts = Options ++ [{source_path, [Src]}, subpackages, - {title, io_lib:fwrite("The ~s application", [App])}, + {title, io_lib:fwrite("The ~ts application", [App])}, {overview, Overview}, {dir, filename:join(Dir, ?EDOC_DIR)}, {includes, [filename:join(Dir, "include")]}], diff --git a/lib/edoc/src/edoc_doclet.erl b/lib/edoc/src/edoc_doclet.erl index 006b07574b..6e17ec0af0 100644 --- a/lib/edoc/src/edoc_doclet.erl +++ b/lib/edoc/src/edoc_doclet.erl @@ -152,7 +152,7 @@ title(App, Options) -> if App == ?NO_APP -> "Overview"; true -> - io_lib:fwrite("Application: ~s", [App]) + io_lib:fwrite("Application: ~ts", [App]) end). diff --git a/lib/edoc/src/edoc_extract.erl b/lib/edoc/src/edoc_extract.erl index 68edad1a3e..390851e9ef 100644 --- a/lib/edoc/src/edoc_extract.erl +++ b/lib/edoc/src/edoc_extract.erl @@ -488,8 +488,15 @@ find_names([P | Ps], Ns) -> find_names([P1 | Ps], Ns); record_expr -> A = erl_syntax:record_expr_type(P), - N = list_to_atom(capitalize(erl_syntax:atom_name(A))), - find_names(Ps, [N | Ns]); + AtomName = erl_syntax:atom_name(A), + Atom = list_to_atom(AtomName), + case AtomName =:= lists:flatten(io_lib:write_atom(Atom)) of + true -> + N = list_to_atom(capitalize(AtomName)), + find_names(Ps, [N | Ns]); + false -> + find_names(Ps, Ns) + end; infix_expr -> %% this can only be a '++' operation P1 = erl_syntax:infix_expr_right(P), @@ -540,6 +547,7 @@ tidy_name_1(Cs) -> [$_ | Cs]. %% Change initial character from lowercase to uppercase. capitalize([C | Cs]) when C >= $a, C =< $z -> [C - 32 | Cs]; +capitalize([C | Cs]) when C >= $\340, C =< $\376, C /= $\367 -> [C - 32 | Cs]; capitalize(Cs) -> Cs. %% Collects the tags belonging to each entry, checks them, expands diff --git a/lib/edoc/src/edoc_layout.erl b/lib/edoc/src/edoc_layout.erl index 5ef210980c..eafab0588e 100644 --- a/lib/edoc/src/edoc_layout.erl +++ b/lib/edoc/src/edoc_layout.erl @@ -109,14 +109,20 @@ module(Element, Options) -> stylesheet, index_columns, sort_functions, + encoding, pretty_printer}). init_opts(Element, Options) -> + Encoding = case get_attrval(encoding, Element) of + "latin1" -> latin1; + _ -> utf8 + end, R = #opts{root = get_attrval(root, Element), index_columns = proplists:get_value(index_columns, Options, 1), sort_functions = proplists:get_value(sort_functions, Options, true), + encoding = Encoding, pretty_printer = proplists:get_value(pretty_printer, Options, '') }, @@ -183,8 +189,9 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) -> Desc = get_content(description, Es), ShortDesc = get_content(briefDescription, Desc), FullDesc = get_content(fullDescription, Desc), - Functions = [{function_name(E), E} || E <- get_content(functions, Es)], - Types = [{type_name(E), E} || E <- get_content(typedecls, Es)], + Functions = [{function_name(E, Opts), E} || + E <- get_content(functions, Es)], + Types = [{type_name(E, Opts), E} || E <- get_content(typedecls, Es)], SortedFs = if Opts#opts.sort_functions -> lists:sort(Functions); true -> Functions end, @@ -198,7 +205,7 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) -> ++ [?NL] ++ version(Es) ++ since(Es) - ++ behaviours(Es, Name) + ++ behaviours(Es, Name, Opts) ++ authors(Es) ++ references(Es) ++ sees(Es) @@ -215,7 +222,7 @@ layout_module(#xmlElement{name = module, content = Es}=E, Opts) -> ++ [hr, ?NL] ++ navigation("bottom") ++ footer()), - Encoding = get_attrval(encoding, E), + Encoding = Opts#opts.encoding, xhtml(Title, stylesheet(Opts), Body, Encoding). module_params(Es) -> @@ -367,7 +374,7 @@ function(Name, E=#xmlElement{content = Es}, Opts) -> case typespec(get_content(typespec, Es), Opts) of [] -> signature(get_content(args, Es), - get_attrval(name, E)); + atom(get_attrval(name, E), Opts)); Spec -> Spec end}, ?NL] @@ -387,8 +394,8 @@ function(Name, E=#xmlElement{content = Es}, Opts) -> ++ sees(Es) ++ todos(Es)). -function_name(E) -> - atom(get_attrval(name, E)) ++ "/" ++ get_attrval(arity, E). +function_name(E, Opts) -> + atom(get_attrval(name, E), Opts) ++ "/" ++ get_attrval(arity, E). function_header(Name, E, Private) -> case is_exported(E) of @@ -449,7 +456,7 @@ throws(Es, Opts) -> [] -> []; Es1 -> %% Doesn't use format_type; keep it short! - [{p, (["throws ", {tt, t_utype(get_elem(type, Es1))}] + [{p, (["throws ", {tt, t_utype(get_elem(type, Es1), Opts)}] ++ local_defs(get_elem(localdef, Es1), Opts))}, ?NL] end. @@ -458,7 +465,7 @@ throws(Es, Opts) -> typespec([], _Opts) -> []; typespec(Es, Opts) -> - Name = t_name(get_elem(erlangName, Es)), + Name = t_name(get_elem(erlangName, Es), Opts), Defs = get_elem(localdef, Es), [Type] = get_elem(type, Es), format_spec(Name, Type, Defs, Opts) ++ local_defs(Defs, Opts). @@ -479,12 +486,12 @@ typedecl(Name, E=#xmlElement{content = Es}, Opts) -> ++ [{p, typedef(get_content(typedef, Es), Opts)}, ?NL] ++ fulldesc(Es)). -type_name(#xmlElement{content = Es}) -> - t_name(get_elem(erlangName, get_content(typedef, Es))). +type_name(#xmlElement{content = Es}, Opts) -> + t_name(get_elem(erlangName, get_content(typedef, Es)), Opts). typedef(Es, Opts) -> - Name = ([t_name(get_elem(erlangName, Es)), "("] - ++ seq(fun t_utype_elem/1, get_content(argtypes, Es), [")"])), + Name = ([t_name(get_elem(erlangName, Es), Opts), "("] + ++ seq(t_utype_elem_fun(Opts), get_content(argtypes, Es), [")"])), (case get_elem(type, Es) of [] -> [{b, ["abstract datatype"]}, ": ", {tt, Name}]; Type -> format_type(Name, Name, Type, [], Opts) @@ -505,7 +512,9 @@ local_defs(Es0, Last, Opts) -> localdef(E = #xmlElement{content = Es}, Last, Opts) -> Name = case get_elem(typevar, Es) of [] -> - label_anchor(N0 = t_abstype(get_content(abstype, Es)), E); + label_anchor(N0 = t_abstype(get_content(abstype, Es), + Opts), + E); [V] -> N0 = t_var(V) end, @@ -516,97 +525,99 @@ localdef(E = #xmlElement{content = Es}, Last, Opts) -> %% (fast) Erlang pretty printer). format_spec(Name, Type, Defs, #opts{pretty_printer = erl_pp}=Opts) -> try - L = t_clause(Name, Type), - O = pp_clause(Name, Type), - {R, ".\n"} = etypef(L, O), + L = t_clause(Name, Type, Opts), + O = pp_clause(Name, Type, Opts), + {R, ".\n"} = etypef(L, O, Opts), [{pre, R}] catch _:_ -> %% Should not happen. format_spec(Name, Type, Defs, Opts#opts{pretty_printer=''}) end; -format_spec(Sep, Type, Defs, _Opts) -> +format_spec(Sep, Type, Defs, Opts) -> %% Very limited formatting. Br = if Defs =:= [] -> br; true -> [] end, - [{tt, t_clause(Sep, Type)}, Br]. + [{tt, t_clause(Sep, Type, Opts)}, Br]. -t_clause(Name, Type) -> +t_clause(Name, Type, Opts) -> #xmlElement{content = [#xmlElement{name = 'fun', content = C}]} = Type, - [Name] ++ t_fun(C). + [Name] ++ t_fun(C, Opts). -pp_clause(Pre, Type) -> +pp_clause(Pre, Type, Opts) -> Types = ot_utype([Type]), - Atom = lists:duplicate(iolist_size(Pre), $a), + Atom = lists:duplicate(string:length(Pre), $a), Attr = {attribute,0,spec,{{list_to_atom(Atom),0},[Types]}}, - L1 = erl_pp:attribute(erl_parse:new_anno(Attr)), + L1 = erl_pp:attribute(erl_parse:new_anno(Attr), + [{encoding, Opts#opts.encoding}]), "-spec " ++ L2 = lists:flatten(L1), L3 = Pre ++ lists:nthtail(length(Atom), L2), - re:replace(L3, "\n ", "\n", [{return,list},global]). + re:replace(L3, "\n ", "\n", [{return,list},global,unicode]). format_type(Prefix, Name, Type, Last, #opts{pretty_printer = erl_pp}=Opts) -> try - L = t_utype(Type), - O = pp_type(Name, Type), - {R, ".\n"} = etypef(L, O), + L = t_utype(Type, Opts), + O = pp_type(Name, Type, Opts), + {R, ".\n"} = etypef(L, O, Opts), [{pre, Prefix ++ [" = "] ++ R ++ Last}] catch _:_ -> %% Example: "t() = record(a)." format_type(Prefix, Name, Type, Last, Opts#opts{pretty_printer =''}) end; -format_type(Prefix, _Name, Type, Last, _Opts) -> - [{tt, Prefix ++ [" = "] ++ t_utype(Type) ++ Last}]. +format_type(Prefix, _Name, Type, Last, Opts) -> + [{tt, Prefix ++ [" = "] ++ t_utype(Type, Opts) ++ Last}]. -pp_type(Prefix, Type) -> - Atom = list_to_atom(lists:duplicate(iolist_size(Prefix), $a)), +pp_type(Prefix, Type, Opts) -> + Atom = list_to_atom(lists:duplicate(string:length(Prefix), $a)), Attr = {attribute,0,type,{Atom,ot_utype(Type),[]}}, - L1 = erl_pp:attribute(erl_parse:new_anno(Attr)), + L1 = erl_pp:attribute(erl_parse:new_anno(Attr), + [{encoding, Opts#opts.encoding}]), {L2,N} = case lists:dropwhile(fun(C) -> C =/= $: end, lists:flatten(L1)) of ":: " ++ L3 -> {L3,9}; % compensation for extra "()" and ":" "::\n" ++ L3 -> {"\n"++L3,6} end, Ss = lists:duplicate(N, $\s), - re:replace(L2, "\n"++Ss, "\n", [{return,list},global]). + re:replace(L2, "\n"++Ss, "\n", [{return,list},global,unicode]). -etypef(L, O0) -> - {R, O} = etypef(L, [], O0, []), +etypef(L, O0, Opts) -> + {R, O} = etypef(L, [], O0, [], Opts), {lists:reverse(R), O}. -etypef([C | L], St, [C | O], R) -> - etypef(L, St, O, [[C] | R]); -etypef(" "++L, St, O, R) -> - etypef(L, St, O, R); -etypef("", [Cs | St], O, R) -> - etypef(Cs, St, O, R); -etypef("", [], O, R) -> +etypef([C | L], St, [C | O], R, Opts) -> + etypef(L, St, O, [[C] | R], Opts); +etypef(" "++L, St, O, R, Opts) -> + etypef(L, St, O, R, Opts); +etypef("", [Cs | St], O, R, Opts) -> + etypef(Cs, St, O, R, Opts); +etypef("", [], O, R, _Opts) -> {R, O}; -etypef(L, St, " "++O, R) -> - etypef(L, St, O, [" " | R]); -etypef(L, St, "\n"++O, R) -> +etypef(L, St, " "++O, R, Opts) -> + etypef(L, St, O, [" " | R], Opts); +etypef(L, St, "\n"++O, R, Opts) -> Ss = lists:takewhile(fun(C) -> C =:= $\s end, O), - etypef(L, St, lists:nthtail(length(Ss), O), ["\n"++Ss | R]); -etypef([{a, HRef, S0} | L], St, O0, R) -> - {S, O} = etypef(S0, app_fix(O0)), - etypef(L, St, O, [{a, HRef, S} | R]); -etypef("="++L, St, "::"++O, R) -> + etypef(L, St, lists:nthtail(length(Ss), O), ["\n"++Ss | R], Opts); +etypef([{a, HRef, S0} | L], St, O0, R, Opts) -> + {S, O} = etypef(S0, app_fix(O0, Opts), Opts), + etypef(L, St, O, [{a, HRef, S} | R], Opts); +etypef("="++L, St, "::"++O, R, Opts) -> %% EDoc uses "=" for record field types; Erlang types use "::". %% Maybe there should be an option for this, possibly affecting %% other similar discrepancies. - etypef(L, St, O, ["=" | R]); -etypef([Cs | L], St, O, R) -> - etypef(Cs, [L | St], O, R). + etypef(L, St, O, ["=" | R], Opts); +etypef([Cs | L], St, O, R, Opts) -> + etypef(Cs, [L | St], O, R, Opts). -app_fix(L) -> +app_fix(L, Opts) -> try - {"//" ++ R1,L2} = app_fix(L, 1), + {"//" ++ R1,L2} = app_fix1(L, 1), [App, Mod] = string:tokens(R1, "/"), - "//" ++ atom(App) ++ "/" ++ atom(Mod) ++ L2 + "//" ++ atom(App, Opts) ++ "/" ++ atom(Mod, Opts) ++ L2 catch _:_ -> L end. -app_fix(L, I) -> % a bit slow +app_fix1(L, I) -> % a bit slow {L1, L2} = lists:split(I, L), case erl_scan:tokens([], L1 ++ ". ", 1) of {done, {ok,[{atom,_,Atom}|_],_}, _} -> {atom_to_list(Atom), L2}; - _ -> app_fix(L, I+1) + _ -> app_fix1(L, I+1) end. fulldesc(Es) -> @@ -703,7 +714,7 @@ deprecated(Es, S) -> ?NL] end. -behaviours(Es, Name) -> +behaviours(Es, Name, Opts) -> CBs = get_content(callbacks, Es), OCBs = get_content(optional_callbacks, Es), (case get_elem(behaviour, Es) of @@ -717,17 +728,18 @@ behaviours(Es, Name) -> if CBs =:= [], OCBs =:= [] -> []; true -> + CBFun = fun(E) -> callback(E, Opts) end, Req = if CBs =:= [] -> []; true -> [br, " Required callback functions: "] - ++ seq(fun callback/1, CBs, ["."]) + ++ seq(CBFun, CBs, ["."]) end, Opt = if OCBs =:= [] -> []; true -> [br, " Optional callback functions: "] - ++ seq(fun callback/1, OCBs, ["."]) + ++ seq(CBFun, OCBs, ["."]) end, [{p, ([{b, ["This module defines the ", {tt, [Name]}, " behaviour."]}] @@ -738,10 +750,10 @@ behaviours(Es, Name) -> behaviour(E=#xmlElement{content = Es}) -> see(E, [{tt, Es}]). -callback(E=#xmlElement{}) -> +callback(E=#xmlElement{}, Opts) -> Name = get_attrval(name, E), Arity = get_attrval(arity, E), - [{tt, [Name, "/", Arity]}]. + [{tt, [atom(Name, Opts), "/", Arity]}]. authors(Es) -> case get_elem(author, Es) of @@ -751,7 +763,9 @@ authors(Es) -> ?NL] end. -atom(String) -> +atom(String, #opts{encoding = latin1}) -> + io_lib:write_atom_as_latin1(list_to_atom(String)); +atom(String, #opts{encoding = utf8}) -> io_lib:write_atom(list_to_atom(String)). %% <!ATTLIST author @@ -799,70 +813,73 @@ todos(Es) -> ?NL] end. -t_name([E]) -> +t_name([E], Opts) -> N = get_attrval(name, E), case get_attrval(module, E) of - "" -> atom(N); + "" -> atom(N, Opts); M -> - S = atom(M) ++ ":" ++ atom(N), + S = atom(M, Opts) ++ ":" ++ atom(N, Opts), case get_attrval(app, E) of "" -> S; - A -> "//" ++ atom(A) ++ "/" ++ S + A -> "//" ++ atom(A, Opts) ++ "/" ++ S end end. -t_utype([E]) -> - t_utype_elem(E). +t_utype([E], Opts) -> + t_utype_elem(E, Opts). + +t_utype_elem_fun(Opts) -> + fun(E) -> t_utype_elem(E, Opts) end. -t_utype_elem(E=#xmlElement{content = Es}) -> +t_utype_elem(E=#xmlElement{content = Es}, Opts) -> case get_attrval(name, E) of - "" -> t_type(Es); + "" -> t_type(Es, Opts); Name -> - T = t_type(Es), + T = t_type(Es, Opts), case T of [Name] -> T; % avoid generating "Foo::Foo" T -> [Name] ++ ["::"] ++ T end end. -t_type([E=#xmlElement{name = typevar}]) -> +t_type([E=#xmlElement{name = typevar}], _Opts) -> t_var(E); -t_type([E=#xmlElement{name = atom}]) -> - t_atom(E); -t_type([E=#xmlElement{name = integer}]) -> +t_type([E=#xmlElement{name = atom}], Opts) -> + t_atom(E, Opts); +t_type([E=#xmlElement{name = integer}], _Opts) -> t_integer(E); -t_type([E=#xmlElement{name = range}]) -> +t_type([E=#xmlElement{name = range}], _Opts) -> t_range(E); -t_type([E=#xmlElement{name = binary}]) -> +t_type([E=#xmlElement{name = binary}], _Opts) -> t_binary(E); -t_type([E=#xmlElement{name = float}]) -> +t_type([E=#xmlElement{name = float}], _Opts) -> t_float(E); -t_type([#xmlElement{name = nil}]) -> +t_type([#xmlElement{name = nil}], _Opts) -> t_nil(); -t_type([#xmlElement{name = paren, content = Es}]) -> - t_paren(Es); -t_type([#xmlElement{name = list, content = Es}]) -> - t_list(Es); -t_type([#xmlElement{name = nonempty_list, content = Es}]) -> - t_nonempty_list(Es); -t_type([#xmlElement{name = map, content = Es}]) -> - t_map(Es); -t_type([#xmlElement{name = tuple, content = Es}]) -> - t_tuple(Es); -t_type([#xmlElement{name = 'fun', content = Es}]) -> - ["fun("] ++ t_fun(Es) ++ [")"]; -t_type([E = #xmlElement{name = record, content = Es}]) -> - t_record(E, Es); -t_type([E = #xmlElement{name = abstype, content = Es}]) -> - t_abstype(E, Es); -t_type([#xmlElement{name = union, content = Es}]) -> - t_union(Es). +t_type([#xmlElement{name = paren, content = Es}], Opts) -> + t_paren(Es, Opts); +t_type([#xmlElement{name = list, content = Es}], Opts) -> + t_list(Es, Opts); +t_type([#xmlElement{name = nonempty_list, content = Es}], Opts) -> + t_nonempty_list(Es, Opts); +t_type([#xmlElement{name = map, content = Es}], Opts) -> + t_map(Es, Opts); +t_type([#xmlElement{name = tuple, content = Es}], Opts) -> + t_tuple(Es, Opts); +t_type([#xmlElement{name = 'fun', content = Es}], Opts) -> + ["fun("] ++ t_fun(Es, Opts) ++ [")"]; +t_type([E = #xmlElement{name = record, content = Es}], Opts) -> + t_record(E, Es, Opts); +t_type([E = #xmlElement{name = abstype, content = Es}], Opts) -> + t_abstype(E, Es, Opts); +t_type([#xmlElement{name = union, content = Es}], Opts) -> + t_union(Es, Opts). t_var(E) -> [get_attrval(name, E)]. -t_atom(E) -> - [get_attrval(value, E)]. +t_atom(E, Opts) -> + [atom(get_attrval(value, E), Opts)]. t_integer(E) -> [get_attrval(value, E)]. @@ -879,62 +896,64 @@ t_float(E) -> t_nil() -> ["[]"]. -t_paren(Es) -> - ["("] ++ t_utype(get_elem(type, Es)) ++ [")"]. +t_paren(Es, Opts) -> + ["("] ++ t_utype(get_elem(type, Es), Opts) ++ [")"]. -t_list(Es) -> - ["["] ++ t_utype(get_elem(type, Es)) ++ ["]"]. +t_list(Es, Opts) -> + ["["] ++ t_utype(get_elem(type, Es), Opts) ++ ["]"]. -t_nonempty_list(Es) -> - ["["] ++ t_utype(get_elem(type, Es)) ++ [", ...]"]. +t_nonempty_list(Es, Opts) -> + ["["] ++ t_utype(get_elem(type, Es), Opts) ++ [", ...]"]. -t_tuple(Es) -> - ["{"] ++ seq(fun t_utype_elem/1, Es, ["}"]). +t_tuple(Es, Opts) -> + ["{"] ++ seq(t_utype_elem_fun(Opts), Es, ["}"]). -t_fun(Es) -> - ["("] ++ seq(fun t_utype_elem/1, get_content(argtypes, Es), - [") -> "] ++ t_utype(get_elem(type, Es))). +t_fun(Es, Opts) -> + ["("] ++ seq(t_utype_elem_fun(Opts), get_content(argtypes, Es), + [") -> "] ++ t_utype(get_elem(type, Es), Opts)). -t_map(Es) -> +t_map(Es, Opts) -> Fs = get_elem(map_field, Es), - ["#{"] ++ seq(fun t_map_field/1, Fs, ["}"]). + ["#{"] ++ seq(fun(E) -> t_map_field(E, Opts) end, Fs, ["}"]). -t_map_field(#xmlElement{content = [K,V]}=E) -> - KElem = t_utype_elem(K), - VElem = t_utype_elem(V), +t_map_field(#xmlElement{content = [K,V]}=E, Opts) -> + KElem = t_utype_elem(K, Opts), + VElem = t_utype_elem(V, Opts), AS = case get_attrval(assoc_type, E) of "assoc" -> " => "; "exact" -> " := " end, KElem ++ [AS] ++ VElem. -t_record(E, Es) -> - Name = ["#"] ++ t_type(get_elem(atom, Es)), +t_record(E, Es, Opts) -> + Name = ["#"] ++ t_type(get_elem(atom, Es), Opts), case get_elem(field, Es) of [] -> see(E, [Name, "{}"]); Fs -> - see(E, Name) ++ ["{"] ++ seq(fun t_field/1, Fs, ["}"]) + see(E, Name) ++ ["{"] ++ seq(fun(F) -> t_field(F, Opts) end, + Fs, ["}"]) end. -t_field(#xmlElement{content = Es}) -> - t_type(get_elem(atom, Es)) ++ [" = "] ++ t_utype(get_elem(type, Es)). +t_field(#xmlElement{content = Es}, Opts) -> + (t_type(get_elem(atom, Es), Opts) ++ [" = "] ++ + t_utype(get_elem(type, Es), Opts)). -t_abstype(E, Es) -> - Name = t_name(get_elem(erlangName, Es)), +t_abstype(E, Es, Opts) -> + Name = t_name(get_elem(erlangName, Es), Opts), case get_elem(type, Es) of [] -> see(E, [Name, "()"]); Ts -> - see(E, [Name]) ++ ["("] ++ seq(fun t_utype_elem/1, Ts, [")"]) + see(E, [Name]) ++ ["("] ++ seq(t_utype_elem_fun(Opts), Ts, [")"]) end. -t_abstype(Es) -> - ([t_name(get_elem(erlangName, Es)), "("] - ++ seq(fun t_utype_elem/1, get_elem(type, Es), [")"])). +t_abstype(Es, Opts) -> + ([t_name(get_elem(erlangName, Es), Opts), "("] + ++ seq(t_utype_elem_fun(Opts), get_elem(type, Es), [")"])). -t_union(Es) -> - seq(fun t_utype_elem/1, Es, " | ", []). +t_union(Es, Opts) -> + seq(t_utype_elem_fun(Opts), Es, " | ", []). seq(F, Es) -> seq(F, Es, []). @@ -989,8 +1008,8 @@ local_label(R) -> xhtml(Title, CSS, Body, Encoding) -> EncString = case Encoding of - "latin1" -> "ISO-8859-1"; - _ -> "UTF-8" + latin1 -> "ISO-8859-1"; + utf8 -> "UTF-8" end, [{html, [?NL, {head, [?NL, @@ -1009,11 +1028,11 @@ xhtml(Title, CSS, Body, Encoding) -> %% --------------------------------------------------------------------- type(E) -> - type(E, []). + Opts = init_opts(E, []), + type(E, [], Opts). -type(E, Ds) -> - Opts = [], - xmerl:export_simple_content(t_utype_elem(E) ++ local_defs(Ds, Opts), +type(E, Ds, Opts) -> + xmerl:export_simple_content(t_utype_elem(E, Opts) ++ local_defs(Ds, Opts), ?HTML_EXPORT). overview(E=#xmlElement{name = overview, content = Es}, Options) -> @@ -1036,7 +1055,7 @@ overview(E=#xmlElement{name = overview, content = Es}, Options) -> ++ [?NL, hr] ++ navigation("bottom") ++ footer()), - Encoding = get_attrval(encoding, E), + Encoding = Opts#opts.encoding, XML = xhtml(Title, stylesheet(Opts), Body, Encoding), xmerl:export_simple(XML, ?HTML_EXPORT, []). @@ -1094,8 +1113,8 @@ ot_var(E) -> {var,0,list_to_atom(get_attrval(name, E))}. ot_atom(E) -> - {ok, [{atom,A,Name}], _} = erl_scan:string(get_attrval(value, E), 0), - {atom,erl_anno:line(A),Name}. + Name = list_to_atom(get_attrval(value, E)), + {atom,erl_anno:new(0),Name}. ot_integer(E) -> {integer,0,list_to_integer(get_attrval(value, E))}. diff --git a/lib/edoc/src/edoc_report.erl b/lib/edoc/src/edoc_report.erl index ed778c8112..76557ef483 100644 --- a/lib/edoc/src/edoc_report.erl +++ b/lib/edoc/src/edoc_report.erl @@ -94,7 +94,7 @@ where({File, footer}) -> where({File, header}) -> io_lib:fwrite("~ts, in header file: ", [File]); where({File, {F, A}}) -> - io_lib:fwrite("~ts, function ~s/~w: ", [File, F, A]); + io_lib:fwrite("~ts, function ~ts/~w: ", [File, F, A]); where([]) -> io_lib:fwrite("~s: ", [?APPLICATION]); where(File) when is_list(File) -> diff --git a/lib/edoc/src/edoc_scanner.erl b/lib/edoc/src/edoc_scanner.erl index f1d5e1d4b9..35d00c6c0e 100644 --- a/lib/edoc/src/edoc_scanner.erl +++ b/lib/edoc/src/edoc_scanner.erl @@ -86,6 +86,8 @@ scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks scan1(Cs, Toks, Pos); scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom scan_atom(C, Cs, Toks, Pos); +scan1([C|Cs], Toks, Pos) when C >= $\337, C =< $\377, C /= $\367 -> + scan_atom(C, Cs, Toks, Pos); scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers scan_number(C, Cs, Toks, Pos); scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers @@ -96,6 +98,8 @@ scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables scan_variable(C, Cs, Toks, Pos); scan1([$_|Cs], Toks, Pos) -> % Variables scan_variable($_, Cs, Toks, Pos); +scan1([C|Cs], Toks, Pos) when C >= $\300, C =< $\336, C /= $\327 -> + scan_variable(C, Cs, Toks, Pos); scan1([$$|Cs], Toks, Pos) -> % Character constant case scan_char_const(Cs, Toks, Pos) of {ok, Result} -> @@ -261,6 +265,15 @@ scan_char([], _Pos) -> %% The following conforms to Standard Erlang escape sequences. +-define(HEX(C), C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f). + +-define(UNICODE(C), + (C >= 0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF)). + scan_escape([O1, O2, O3 | Cs], Pos) when % \<1-3> octal digits O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> Val = (O1*8 + O2)*8 + O3 - 73*$0, @@ -272,6 +285,11 @@ scan_escape([O1, O2 | Cs], Pos) when scan_escape([O1 | Cs], Pos) when O1 >= $0, O1 =< $7 -> {O1 - $0,Cs,Pos}; +scan_escape([$x, ${ | Cs], Pos) -> + scan_hex(Cs, Pos, []); +scan_escape([$x, H1, H2 | Cs], Pos) when ?HEX(H1), ?HEX(H2) -> + Val = (H1*16 + H2) - 17*$0, + {Val,Cs,Pos}; scan_escape([$^, C | Cs], Pos) -> % \^X -> CTL-X if C >= $\100, C =< $\137 -> {C - $\100,Cs,Pos}; @@ -285,6 +303,18 @@ scan_escape([C | Cs], Pos) -> scan_escape([], _Pos) -> {error, truncated_char}. +scan_hex([C | Cs], Pos, HCs) when ?HEX(C) -> + scan_hex(Cs, Pos, [C | HCs]); +scan_hex([$} | Cs], Pos, HCs) -> + case catch erlang:list_to_integer(lists:reverse(HCs), 16) of + Val when ?UNICODE(Val) -> + {Val,Cs,Pos}; + _ -> + {error, undefined_escape_sequence} + end; +scan_hex(_Cs, _Pos, _HCs) -> + {error, undefined_escape_sequence}. + %% Note that we return $\000 for undefined escapes. escape_char($b) -> $\010; % \b = BS escape_char($d) -> $\177; % \d = DEL diff --git a/lib/edoc/src/edoc_specs.erl b/lib/edoc/src/edoc_specs.erl index c15dfd328f..fb04bfce0e 100644 --- a/lib/edoc/src/edoc_specs.erl +++ b/lib/edoc/src/edoc_specs.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -274,12 +274,19 @@ parms([A | As], [D | Ds]) -> param(#t_paren{type = Type}, Default) -> param(Type, Default); -param(#t_record{name = #t_atom{val = Name}}, _Default) -> - list_to_atom(capitalize(atom_to_list(Name))); +param(#t_record{name = #t_atom{val = Name}}=T, Default) -> + AtomList = atom_to_list(Name), + case AtomList =:= lists:flatten(io_lib:write_atom(Name)) of + true -> + list_to_atom(capitalize(AtomList)); + false -> + arg_name(?t_ann(T), Default) + end; param(T, Default) -> arg_name(?t_ann(T), Default). capitalize([C | Cs]) when C >= $a, C =< $z -> [C - 32 | Cs]; +capitalize([C | Cs]) when C >= $\340, C =< $\376, C /= $\367 -> [C - 32 | Cs]; capitalize(Cs) -> Cs. %% Like edoc_types:arg_name/1 diff --git a/lib/edoc/test/edoc_SUITE.erl b/lib/edoc/test/edoc_SUITE.erl index 4d846ad63d..29ca9d1203 100644 --- a/lib/edoc/test/edoc_SUITE.erl +++ b/lib/edoc/test/edoc_SUITE.erl @@ -23,12 +23,13 @@ init_per_group/2,end_per_group/2]). %% Test cases --export([app/1,appup/1,build_std/1,build_map_module/1,otp_12008/1, build_app/1]). +-export([app/1,appup/1,build_std/1,build_map_module/1,otp_12008/1, + build_app/1, otp_14285/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> - [app,appup,build_std,build_map_module,otp_12008, build_app]. + [app,appup,build_std,build_map_module,otp_12008, build_app, otp_14285]. groups() -> []. @@ -113,3 +114,18 @@ build_app(Config) -> true = filelib:is_regular(filename:join(OutDir, "a.html")), true = filelib:is_regular(filename:join(OutDir, "b.html")), ok. + +otp_14285(Config) -> + DataDir = ?config(data_dir, Config), + PrivDir = ?config(priv_dir, Config), + Un1 = filename:join(DataDir, "un_atom1.erl"), + Un2 = filename:join(DataDir, "un_atom2.erl"), + %% epp_dodger + Opts1 = [{dir, PrivDir}], + ok = edoc:files([Un1], Opts1), + ok = edoc:files([Un2], Opts1), + %% epp + Opts2 = [{preprocess, true}, {dir, PrivDir}], + ok = edoc:files([Un1], Opts2), + ok = edoc:files([Un2], Opts2), + ok. diff --git a/lib/edoc/test/edoc_SUITE_data/un_atom1.erl b/lib/edoc/test/edoc_SUITE_data/un_atom1.erl new file mode 100644 index 0000000000..20ca50d5d2 --- /dev/null +++ b/lib/edoc/test/edoc_SUITE_data/un_atom1.erl @@ -0,0 +1,41 @@ +%% coding:latin-1 + +-module(un_atom1). + +-export(['\x{aaa}memory'/0, 'func-\x{400}'/1, func/1, �func/1]). + +-record('rec-\x{400}', {'field-\x{400}'}). + +-type cs() :: $\x{a} + | $\x{aa} + | $\x{aaa} + | $\xaa. + +-callback 'callback-\x{400}'() -> 'apa'. + +-type 'type-\x{400}'() :: 'atom-\x{400}' + | cs() + | #'rec-\x{400}'{'field-\x{400}' :: 'type-\x{400}'()}. + +-spec '\x{aaa}memory'() -> 'type-\x{400}'(). + +'\x{aaa}memory'() -> + apa:foo(). + +%% @deprecated Please use {@link m:f/1}. +-spec 'func-\x{400}'(#'rec-\x{400}'{}) -> #'rec-\x{400}'{}. + +'func-\x{400}'(_T) -> + foo:bar(#'rec-\x{400}'{}). + +-record(rec, {}). + +-spec func(#rec{}) -> #rec{}. + +func(#rec{}) -> #rec{}. + +-record(�rec, {}). + +-spec �func(#�rec{}) -> #�rec{}. + +�func(#�rec{}) -> #�rec{}. diff --git a/lib/edoc/test/edoc_SUITE_data/un_atom2.erl b/lib/edoc/test/edoc_SUITE_data/un_atom2.erl new file mode 100644 index 0000000000..66c83e30e0 --- /dev/null +++ b/lib/edoc/test/edoc_SUITE_data/un_atom2.erl @@ -0,0 +1,40 @@ +%% coding:utf-8 +-module(un_atom2). + +-export(['\x{aaa}memory'/0, 'func-\x{400}'/1, func/1, äfunc/1]). + +-record('rec-\x{400}', {'field-\x{400}'}). + +-type cs() :: $\x{a} + | $\x{aa} + | $\x{aaa} + | $\xaa. + +-callback 'callback-\x{400}'() -> 'apa'. + +-type 'type-\x{400}'() :: 'atom-\x{400}' + | cs() + | #'rec-\x{400}'{'field-\x{400}' :: 'type-\x{400}'()}. + +-spec '\x{aaa}memory'() -> 'type-\x{400}'(). + +'\x{aaa}memory'() -> + apa:foo(). + +%% @deprecated Please use {@link m:f/1}. +-spec 'func-\x{400}'(#'rec-\x{400}'{}) -> #'rec-\x{400}'{}. + +'func-\x{400}'(_T) -> + foo:bar(#'rec-\x{400}'{}). + +-record(rec, {}). + +-spec func(#rec{}) -> #rec{}. + +func(#rec{}) -> #rec{}. + +-record(ärec, {}). + +-spec äfunc(#ärec{}) -> #ärec{}. + +äfunc(#ärec{}) -> #ärec{}. diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml index 29d546105f..1227625287 100644 --- a/lib/parsetools/doc/src/leex.xml +++ b/lib/parsetools/doc/src/leex.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>2009</year><year>2016</year> + <year>2009</year><year>2017</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -446,7 +446,8 @@ D = [0-9] </item> </taglist> - <p>The following examples define Erlang data types:</p> + <p>The following examples define simplified versions of a few + Erlang data types:</p> <code> Atoms [a-z][0-9a-zA-Z_]* diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index e0f37ae9df..e2e7d7359f 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -1548,22 +1548,23 @@ out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) -> L = erl_scan:line(hd(Code)), output_file_directive(File, XrlFile, L-2), io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]), - io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]). + io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]). -%% pp_tokens(Tokens, Line) -> [char()]. +%% pp_tokens(Tokens, Line, File) -> [char()]. %% Prints the tokens keeping the line breaks of the original code. -pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none). +pp_tokens(Tokens, Line0, File) -> pp_tokens(Tokens, Line0, File, none). -pp_tokens([], _Line0, _) -> []; -pp_tokens([T | Ts], Line0, Prev) -> +pp_tokens([], _Line0, _, _) -> []; +pp_tokens([T | Ts], Line0, File, Prev) -> Line = erl_scan:line(T), - [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)]. + [pp_sep(Line, Line0, Prev, T), + pp_symbol(T, File) | pp_tokens(Ts, Line, File, T)]. -pp_symbol({var,_,Var}) -> atom_to_list(Var); -pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]); -pp_symbol({dot, _}) -> "."; -pp_symbol({Symbol, _}) -> atom_to_list(Symbol). +pp_symbol({var,_,Var}, _) -> atom_to_list(Var); +pp_symbol({_,_,Symbol}, File) -> format_symbol(Symbol, File); +pp_symbol({dot, _}, _) -> "."; +pp_symbol({Symbol, _}, _) -> atom_to_list(Symbol). pp_sep(Line, Line0, Prev, T) when Line > Line0 -> ["\n " | pp_sep(Line - 1, Line0, Prev, T)]; @@ -1622,17 +1623,17 @@ out_dfa_edges(File, DFA) -> end, orddict:new(), Pt), foreach(fun (T) -> Crs = orddict:fetch(T, Tdict), - Edgelab = dfa_edgelabel(Crs), + Edgelab = dfa_edgelabel(Crs, File), io:fwrite(File, " ~b -> ~b [label=\"~ts\"];~n", [S,T,Edgelab]) end, sort(orddict:fetch_keys(Tdict))) end, DFA). -dfa_edgelabel([C]) when is_integer(C) -> quote(C); -dfa_edgelabel(Cranges) -> +dfa_edgelabel([C], File) when is_integer(C) -> quote(C, File); +dfa_edgelabel(Cranges, File) -> %% io:fwrite("el: ~p\n", [Cranges]), - "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)]; - (C) -> [quote(C)] + "[" ++ map(fun ({A,B}) -> [quote(A, File), "-", quote(B, File)]; + (C) -> [quote(C, File)] end, Cranges) ++ "]". set_encoding(#leex{encoding = none}, File) -> @@ -1651,33 +1652,50 @@ output_file_directive(File, Filename, Line) -> format_filename(Filename0, File) -> Filename = filename:flatten(Filename0), + case enc(File) of + unicode -> io_lib:write_string(Filename); + latin1 -> io_lib:write_string_as_latin1(Filename) + end. + +format_symbol(Symbol, File) -> + Format = case enc(File) of + latin1 -> "~p"; + unicode -> "~tp" + end, + io_lib:fwrite(Format, [Symbol]). + +enc(File) -> case lists:keyfind(encoding, 1, io:getopts(File)) of - {encoding, unicode} -> io_lib:write_string(Filename); - _ -> io_lib:write_string_as_latin1(Filename) + false -> latin1; % should never happen + {encoding, Enc} -> Enc end. -quote($^) -> "\\^"; -quote($.) -> "\\."; -quote($$) -> "\\$"; -quote($-) -> "\\-"; -quote($[) -> "\\["; -quote($]) -> "\\]"; -quote($\s) -> "\\\\s"; -quote($\") -> "\\\""; -quote($\b) -> "\\\\b"; -quote($\f) -> "\\\\f"; -quote($\n) -> "\\\\n"; -quote($\r) -> "\\\\r"; -quote($\t) -> "\\\\t"; -quote($\e) -> "\\\\e"; -quote($\v) -> "\\\\v"; -quote($\d) -> "\\\\d"; -quote($\\) -> "\\\\"; -quote(C) when is_integer(C) -> +quote($^, _File) -> "\\^"; +quote($., _File) -> "\\."; +quote($$, _File) -> "\\$"; +quote($-, _File) -> "\\-"; +quote($[, _File) -> "\\["; +quote($], _File) -> "\\]"; +quote($\s, _File) -> "\\\\s"; +quote($\", _File) -> "\\\""; +quote($\b, _File) -> "\\\\b"; +quote($\f, _File) -> "\\\\f"; +quote($\n, _File) -> "\\\\n"; +quote($\r, _File) -> "\\\\r"; +quote($\t, _File) -> "\\\\t"; +quote($\e, _File) -> "\\\\e"; +quote($\v, _File) -> "\\\\v"; +quote($\d, _File) -> "\\\\d"; +quote($\\, _File) -> "\\\\"; +quote(C, File) when is_integer(C) -> %% Must remove the $ and get the \'s right. - case io_lib:write_char(C) of + S = case enc(File) of + unicode -> io_lib:write_char(C); + latin1 -> io_lib:write_char_as_latin1(C) + end, + case S of [$$,$\\|Cs] -> "\\\\" ++ Cs; [$$|Cs] -> Cs end; -quote(maxchar) -> +quote(maxchar, _File) -> "MAXCHAR". diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index 05446c1a85..48559ec402 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -154,13 +154,13 @@ compile(Input0, Output0, format_error(bad_declaration) -> io_lib:fwrite("unknown or bad declaration, ignored", []); format_error({bad_expect, SymName}) -> - io_lib:fwrite("argument ~s of Expect is not an integer", + io_lib:fwrite("argument ~ts of Expect is not an integer", [format_symbol(SymName)]); format_error({bad_rootsymbol, SymName}) -> - io_lib:fwrite("rootsymbol ~s is not a nonterminal", + io_lib:fwrite("rootsymbol ~ts is not a nonterminal", [format_symbol(SymName)]); format_error({bad_states, SymName}) -> - io_lib:fwrite("argument ~s of States is not an integer", + io_lib:fwrite("argument ~ts of States is not an integer", [format_symbol(SymName)]); format_error({conflict, Conflict}) -> format_conflict(Conflict); @@ -169,19 +169,19 @@ format_error({conflicts, SR, RR}) -> format_error({duplicate_declaration, Tag}) -> io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); format_error({duplicate_nonterminal, Nonterminal}) -> - io_lib:fwrite("duplicate non-terminals ~s", + io_lib:fwrite("duplicate non-terminals ~ts", [format_symbol(Nonterminal)]); format_error({duplicate_precedence, Op}) -> - io_lib:fwrite("duplicate precedence operator ~s", + io_lib:fwrite("duplicate precedence operator ~ts", [format_symbol(Op)]); format_error({duplicate_terminal, Terminal}) -> - io_lib:fwrite("duplicate terminal ~s", + io_lib:fwrite("duplicate terminal ~ts", [format_symbol(Terminal)]); format_error({endsymbol_is_nonterminal, Symbol}) -> - io_lib:fwrite("endsymbol ~s is a nonterminal", + io_lib:fwrite("endsymbol ~ts is a nonterminal", [format_symbol(Symbol)]); format_error({endsymbol_is_terminal, Symbol}) -> - io_lib:fwrite("endsymbol ~s is a terminal", + io_lib:fwrite("endsymbol ~ts is a terminal", [format_symbol(Symbol)]); format_error({error, Module, Error}) -> Module:format_error(Error); @@ -192,7 +192,7 @@ format_error(illegal_empty) -> format_error({internal_error, Error}) -> io_lib:fwrite("internal yecc error: ~w", [Error]); format_error({missing_syntax_rule, Nonterminal}) -> - io_lib:fwrite("no syntax rule for non-terminal symbol ~s", + io_lib:fwrite("no syntax rule for non-terminal symbol ~ts", [format_symbol(Nonterminal)]); format_error({n_states, Exp, N}) -> io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); @@ -201,31 +201,31 @@ format_error(no_grammar_rules) -> format_error(nonterminals_missing) -> io_lib:fwrite("Nonterminals is missing", []); format_error({precedence_op_is_endsymbol, SymName}) -> - io_lib:fwrite("precedence operator ~s is endsymbol", + io_lib:fwrite("precedence operator ~ts is endsymbol", [format_symbol(SymName)]); format_error({precedence_op_is_unknown, SymName}) -> - io_lib:fwrite("unknown precedence operator ~s", + io_lib:fwrite("unknown precedence operator ~ts", [format_symbol(SymName)]); format_error({reserved, N}) -> io_lib:fwrite("the use of ~w should be avoided", [N]); format_error({symbol_terminal_and_nonterminal, SymName}) -> - io_lib:fwrite("symbol ~s is both a terminal and nonterminal", + io_lib:fwrite("symbol ~ts is both a terminal and nonterminal", [format_symbol(SymName)]); format_error(rootsymbol_missing) -> io_lib:fwrite("Rootsymbol is missing", []); format_error(terminals_missing) -> io_lib:fwrite("Terminals is missing", []); format_error({undefined_nonterminal, Symbol}) -> - io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]); + io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]); format_error({undefined_pseudo_variable, Atom}) -> io_lib:fwrite("undefined pseudo variable ~w", [Atom]); format_error({undefined_symbol, SymName}) -> - io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]); + io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]); format_error({unused_nonterminal, Nonterminal}) -> - io_lib:fwrite("non-terminal symbol ~s not used", + io_lib:fwrite("non-terminal symbol ~ts not used", [format_symbol(Nonterminal)]); format_error({unused_terminal, Terminal}) -> - io_lib:fwrite("terminal symbol ~s not used", + io_lib:fwrite("terminal symbol ~ts not used", [format_symbol(Terminal)]); format_error({bad_symbol, String}) -> io_lib:fwrite("bad symbol ~ts", [String]); @@ -1809,9 +1809,9 @@ report_conflict(Conflict, St, ActionName, How) -> Formated = format_symbol(ActionName), case How of prec -> - io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]); + io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]); default -> - io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>, + io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>, [Formated]) end; true -> @@ -1856,7 +1856,7 @@ format_conflict({Symbol, N, _, {one_level_up, {L1, RuleN1, {P1, Ass1}}, {L2, RuleN2, {P2, Ass2}}}}) -> S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " - "scanning ~s in state ~w:\n">>, + "scanning ~ts in state ~w:\n">>, [format_symbol(Symbol), N]), S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n" " vs.\n">>, @@ -1866,26 +1866,26 @@ format_conflict({Symbol, N, _, {one_level_up, [S1, S2, S3]; format_conflict({Symbol, N, Reduce, Confl}) -> S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " - "~s in state ~w:\n">>, [format_symbol(Symbol), N]), + "~ts in state ~w:\n">>, [format_symbol(Symbol), N]), S2 = case Reduce of {[HR | TR], RuleNmbr, RuleLine} -> - io_lib:fwrite(<<" Reduce to ~s from ~s (rule ~w at " + io_lib:fwrite(<<" Reduce to ~ts from ~ts (rule ~w at " "line ~w)\n vs.\n">>, [format_symbol(HR), format_symbols(TR), RuleNmbr, RuleLine]) end, S3 = case Confl of {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} -> - io_lib:fwrite(<<" reduce to ~s from ~s " + io_lib:fwrite(<<" reduce to ~ts from ~ts " "(rule ~w at line ~w).">>, [format_symbol(HR2), format_symbols(TR2), RuleNmbr2, RuleLine2]); {shift, NewState, Sym} -> io_lib:fwrite(<<" shift to state ~w, adding right " - "sisters to ~s.">>, + "sisters to ~ts.">>, [NewState, format_symbol(Sym)]); {accept, Rootsymbol} -> - io_lib:fwrite(<<" reduce to rootsymbol ~s.">>, + io_lib:fwrite(<<" reduce to rootsymbol ~ts.">>, [format_symbol(Rootsymbol)]) end, [S1, S2, S3]. @@ -1926,8 +1926,9 @@ format_conflict({Symbol, N, Reduce, Confl}) -> -define(CODE_VERSION, "1.4"). -define(YECC_BUG(M, A), - iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", - io_lib:fwrite(M, A), "}).\n\n"])). + unicode:characters_to_binary( + [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", + io_lib:fwrite(M, A), "}).\n\n"])). %% Returns number of newlines in included files. output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> @@ -1980,7 +1981,7 @@ output_header(St0) -> output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> output_goto(St, Go, StateInfo); output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> - F = function_name(yeccgoto, Nonterminal), + F = function_name(St0, yeccgoto, Nonterminal), St05 = fwrite(St0, <<"-dialyzer({nowarn_function, ~w/7}).\n">>, [F]), St10 = output_goto1(St05, List, F, StateInfo, true), St = output_goto_fini(F, Nonterminal, St10), @@ -2018,7 +2019,8 @@ output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, [F]), fwrite(St, - ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]), + ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>, + [quoted_atom(St0, NT)]), []); output_goto_fini(_F, _NT, St) -> fwrite(St, <<".\n\n">>, []). @@ -2027,7 +2029,7 @@ output_goto_fini(_F, _NT, St) -> find_user_code(ParseActions, St) -> [#user_code{state = State, terminal = Terminal, - funname = inlined_function_name(State, Terminal), + funname = inlined_function_name(St, State, Terminal), action = Action} || {State, La_actions} <- ParseActions, {Action, Terminals, RuleNmbr, NmbrOfDaughters} @@ -2148,14 +2150,14 @@ output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> output_reduce(St, State, Terminal, Action, IsFirst, SI); output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> St10 = delim(St0, IsFirst), - St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, - [State, quoted_atom(Terminal)]), + St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, quoted_atom(St10, Terminal)]), output_call_to_includefile(NewState, St); output_action(St0, State, Terminal, accept, IsFirst, _SI) -> St10 = delim(St0, IsFirst), St = fwrite(St10, - <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, - [State, quoted_atom(Terminal)]), + <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, + [State, quoted_atom(St10, Terminal)]), fwrite(St, <<" {ok, hd(Stack)}">>, []); output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> St. @@ -2174,19 +2176,19 @@ output_state_actions_fini(State, IsFirst, St0) -> St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), fwrite(St, <<" yeccerror(T).\n\n">>, []). -output_reduce(St0, State, Terminal0, +output_reduce(St0, State, Terminal, #reduce{rule_nmbr = RuleNmbr, head = Head, nmbr_of_daughters = NmbrOfDaughters}, IsFirst, StateInfo) -> St10 = delim(St0, IsFirst), - Terminal = if - is_atom(Terminal0) -> quoted_atom(Terminal0); - true -> Terminal0 - end, + QuotedTerminal = if + is_atom(Terminal) -> quoted_atom(St10, Terminal); + true -> Terminal + end, St20 = fwrite(St10, - <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, - [State, Terminal]), + <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, QuotedTerminal]), St30 = if NmbrOfDaughters < 2 -> @@ -2205,7 +2207,7 @@ output_reduce(St0, State, Terminal0, _ -> NewStack = "NewStack", fwrite(St30, <<" NewStack = ~w(Stack),\n">>, - [inlined_function_name(State, Terminal0)]) + [inlined_function_name(St30, State, Terminal)]) end, if NmbrOfDaughters =:= 0 -> @@ -2221,13 +2223,13 @@ output_reduce(St0, State, Terminal0, St = fwrite(St40, <<"~s">>, [C]), %% Short-circuit call to yeccpars2: fwrite(St, - <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>, - [Repr, NextS, Terminal, State, NewStack]); + <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>, + [Repr, NextS, QuotedTerminal, State, NewStack]); true -> fwrite(St40, - <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>, - [function_name(yeccgoto, Head), Ns, - Terminal, Ns, NewStack]) + <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>, + [function_name(St40, yeccgoto, Head), Ns, + QuotedTerminal, Ns, NewStack]) end. delim(St, true) -> @@ -2235,8 +2237,10 @@ delim(St, true) -> delim(St, false) -> fwrite(St, <<";\n">>, []). -quoted_atom(Atom) -> - io_lib:fwrite(<<"~w">>, [Atom]). +quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) -> + io_lib:write_atom_as_latin1(Atom); +quoted_atom(_St, Atomic) -> + io_lib:write(Atomic). output_inlined(St, UserCodeActions, Infile) -> foldl(fun(#user_code{funname = InlinedFunctionName, @@ -2288,14 +2292,16 @@ output_inlined(St0, FunctionName, Reduce, Infile) -> fwrite(St, <<" [begin\n ~ts\n end | ~s].\n\n">>, [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]). -inlined_function_name(State, "Cat") -> - inlined_function_name(State, ""); -inlined_function_name(State, Terminal) -> - list_to_atom(concat([yeccpars2_, State, '_', Terminal])). +inlined_function_name(St, State, Terminal) -> + End = case Terminal of + "Cat" -> []; + _ -> [quoted_atom(St, Terminal)] + end, + list_to_atom(concat([yeccpars2_, State, '_'] ++ End)). --compile({nowarn_unused_function,function_name/2}). -function_name(Name, Suf) -> - list_to_atom(concat([Name, '_' | quoted_atom(Suf)])). +-compile({nowarn_unused_function,function_name/3}). +function_name(St, Name, Suf) -> + list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])). rule(RulePointer, St) -> #rule{n = N, anno = Anno, symbols = Symbols} = diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl index 54602848ec..3f5d9fee3e 100644 --- a/lib/parsetools/test/leex_SUITE.erl +++ b/lib/parsetools/test/leex_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2016. All Rights Reserved. +%% Copyright Ericsson AB 2010-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ pt/1, man/1, ex/1, ex2/1, not_yet/1, line_wrap/1, - otp_10302/1, otp_11286/1, unicode/1, otp_13916/1]). + otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -67,7 +67,7 @@ all() -> groups() -> [{checks, [], [file, compile, syntax]}, {examples, [], [pt, man, ex, ex2, not_yet, unicode]}, - {tickets, [], [otp_10302, otp_11286, otp_13916]}, + {tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285]}, {bugs, [], [line_wrap]}]. init_per_suite(Config) -> @@ -1131,6 +1131,45 @@ otp_13916(Config) when is_list(Config) -> ?line run(Config, Ts), ok. +otp_14285(Config) -> + Dir = ?privdir, + Filename = filename:join(Dir, "file.xrl"), + + Ts = [{otp_14285_1, + <<"%% encoding: latin-1\n" + "Definitions.\n" + "A = a\n" + "Z = z\n" + "L = [{A}-{Z}]\n" + "U = [\\x{400}]\n" + "Rules.\n" + "{L}+ : {token,l}.\n" + "{U}+ : {token,'\\x{400}'}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + " {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>, + default, + ok}, + {otp_14285_2, + <<"%% encoding: UTF-8\n" + "Definitions.\n" + "A = a\n" + "Z = z\n" + "L = [{A}-{Z}]\n" + "U = [\x{400}]\n" + "Rules.\n" + "{L}+ : {token,l}.\n" + "{U}+ : {token,'\x{400}'}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + " {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>, + default, + ok}], + run(Config, Ts), + ok. + start_node(Name, Args) -> [_,Host] = string:tokens(atom_to_list(node()), "@"), ct:log("Trying to start ~w@~s~n", [Name,Host]), diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl index 2c37278d4b..a7166b91ed 100644 --- a/lib/parsetools/test/yecc_SUITE.erl +++ b/lib/parsetools/test/yecc_SUITE.erl @@ -50,7 +50,7 @@ otp_5369/1, otp_6362/1, otp_7945/1, otp_8483/1, otp_8486/1, otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1, otp_11269/1, - otp_11286/1]). + otp_11286/1, otp_14285/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -78,7 +78,7 @@ groups() -> {bugs, [], [otp_5369, otp_6362, otp_7945, otp_8483, otp_8486]}, {improvements, [], [otp_7292, otp_7969, otp_8919, otp_10302, - otp_11269, otp_11286]}]. + otp_11269, otp_11286, otp_14285]}]. init_per_suite(Config) -> Config. @@ -2048,6 +2048,90 @@ otp_11286(Config) when is_list(Config) -> true = test_server:stop_node(Node), ok. +otp_14285(Config) -> + Dir = ?privdir, + YeccPre = filename:join(Dir, "yeccpre.hrl"), + ?line ok = file:write_file(YeccPre, + [<<"-export([t/0]).\n">>,my_yeccpre()]), + + T0 = <<" + Nonterminals '\\x{400}'. + Terminals t. + Rootsymbol '\\x{400}'. + '\\x{400}' -> t : '$1'. + Erlang code. + t() -> + L = [{t, 1}], + {ok, R} = parse(L), + {t, 1} = R, + ok.">>, + Ts0 = [{otp_14285_1, + [<<"%% coding: Latin-1\n">>,T0],YeccPre,ok}, + {otp_14285_2, + [<<"%% coding: coding: UTF-8\n">>,T0],YeccPre,ok}], + run(Config, Ts0), + file:delete(YeccPre), + + T1 = <<" + Nonterminals '1\\x{400}' list 'unused\\x{400}'. + Terminals '2\\x{400}'. + Rootsymbol '1\\x{400}'. + + '1\\x{400}' -> list : '$1'. + + list -> '2\\x{400}' : '$1'. + list -> list '2\\x{400}' : {foo,'\\x{400}'}. + + Erlang code. + + -export([t/0]). + + t() -> + L = [{'2\\x{400}', 1}, {'2\\x{400}',2}], + {ok, R} = parse(L), + {foo,A} = R, + '\\x{400}' = A, + [1024] = atom_to_list(A), + ok.">>, + + Ts1 = [{otp_14285_3, + [<<"%% coding: Latin-1\n">>,T1],default,ok}, + {otp_14285_4, + [<<"%% coding: UTF-8\n">>,T1],default,ok}], + run(Config, Ts1), + + T2 = <<" + Nonterminals E. + Terminals '-' '+' '=' id. + Rootsymbol E. + Endsymbol '\\x{400}'. + + E -> E '=' E : {op, '=', '$1', '$3'}. + E -> E '+' E : {op, '+', '$1', '$3'}. + E -> '-' E : {op, '-', '$2'}. + E -> id : '$1'. + + Nonassoc 100 '='. + Right 200 '+' '-'. + + Erlang code. + + -export([t/0]). + + t() -> + {ok,{op,'=',{id,1},{op,'-',{op,'+',{id,4},{id,6}}}}} = + parse([{id,1},{'=',2},{'-',3},{id,4},{'+',5},{id,6}, + {'\\x{400}',1}]), + ok.">>, + + Ts2 = [{otp_14285_5, + [<<"%% coding: Latin-1\n">>,T2],default,ok}, + {otp_14285_6, + [<<"%% coding: UTF-8\n">>,T2],default,ok}], + run(Config, Ts2), + + ok. + start_node(Name, Args) -> [_,Host] = string:tokens(atom_to_list(node()), "@"), ct:log("Trying to start ~w@~s~n", [Name,Host]), diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 931e50f6f2..5ae400da62 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1996</year><year>2016</year> + <year>1996</year><year>2017</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -147,7 +147,7 @@ format string (that is, <c>~ts</c> or <c>~tc</c>), the resulting list can contain characters beyond the ISO Latin-1 character range (that is, numbers > 255). If so, the - result is not an ordinary Erlang <c>string()</c>, but can well be + result is still an ordinary Erlang <c>string()</c>, and can well be used in any context where Unicode data is allowed.</p> </desc> </func> @@ -384,6 +384,16 @@ </func> <func> + <name name="write_atom_as_latin1" arity="1"/> + <fsummary>Write an atom.</fsummary> + <desc> + <p>Returns the list of characters needed to print atom + <c><anno>Atom</anno></c>. Non-Latin-1 characters + are escaped.</p> + </desc> + </func> + + <func> <name name="write_char" arity="1"/> <fsummary>Write a character.</fsummary> <desc> diff --git a/lib/stdlib/doc/src/shell.xml b/lib/stdlib/doc/src/shell.xml index f52bc39deb..ab62c2fcdd 100644 --- a/lib/stdlib/doc/src/shell.xml +++ b/lib/stdlib/doc/src/shell.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1996</year><year>2016</year> + <year>1996</year><year>2017</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -854,7 +854,7 @@ q - quit erlang <c>{history, N}</c>, where <c>N</c> is the current command number. The function is to return a list of characters or an atom. This constraint is because of the Erlang I/O protocol. Unicode characters - beyond code point 255 are allowed in the list. Notice + beyond code point 255 are allowed in the list and the atom. Notice that in restricted mode the call <c>Mod:Func(L)</c> must be allowed or the default shell prompt function is called.</p> </section> diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 6068afb293..ee5e7a11bf 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ | {encoding, latin1 | unicode | utf8}). -type(options() :: hook_function() | [option()]). --record(pp, {string_fun, char_fun}). +-record(pp, {value_fun, string_fun, char_fun}). -record(options, {hook, encoding, opts}). @@ -214,11 +214,15 @@ state(_Hook) -> state(). state() -> - #pp{string_fun = fun io_lib:write_string_as_latin1/1, + Options = [{encoding,latin1}], + #pp{value_fun = fun(V) -> io_lib_pretty:print(V, Options) end, + string_fun = fun io_lib:write_string_as_latin1/1, char_fun = fun io_lib:write_char_as_latin1/1}. unicode_state() -> - #pp{string_fun = fun io_lib:write_string/1, + Options = [{encoding,unicode}], + #pp{value_fun = fun(V) -> io_lib_pretty:print(V, Options) end, + string_fun = fun io_lib:write_string/1, char_fun = fun io_lib:write_char/1}. encoding(Options) -> @@ -253,31 +257,30 @@ lattribute({attribute,_Line,Name,Arg}, Opts) -> lattribute(module, {M,Vs}, _Opts) -> A = a0(), - attr("module",[{var,A,pname(M)}, - foldr(fun(V, C) -> {cons,A,{var,A,V},C} - end, {nil,A}, Vs)]); + attr(module,[{var,A,pname(M)}, + foldr(fun(V, C) -> {cons,A,{var,A,V},C} + end, {nil,A}, Vs)]); lattribute(module, M, _Opts) -> - attr("module", [{var,a0(),pname(M)}]); + attr(module, [{var,a0(),pname(M)}]); lattribute(export, Falist, _Opts) -> - call({var,a0(),"-export"}, [falist(Falist)], 0, options(none)); + attrib(export, falist(Falist)); lattribute(import, Name, _Opts) when is_list(Name) -> - attr("import", [{var,a0(),pname(Name)}]); + attr(import, [{var,a0(),pname(Name)}]); lattribute(import, {From,Falist}, _Opts) -> - attr("import",[{var,a0(),pname(From)},falist(Falist)]); + attrib(import, [leaf(pname(From)),falist(Falist)]); lattribute(export_type, Talist, _Opts) -> - call({var,a0(),"-export_type"}, [falist(Talist)], 0, options(none)); + attrib(export_type, falist(Talist)); lattribute(optional_callbacks, Falist, Opts) -> - ArgL = try falist(Falist) - catch _:_ -> abstract(Falist, Opts) - end, - call({var,a0(),"-optional_callbacks"}, [ArgL], 0, options(none)); + try attrib(optional_callbacks, falist(Falist)) + catch _:_ -> attr(optional_callbacks, [abstract(Falist, Opts)]) + end; lattribute(file, {Name,Line}, _Opts) -> - attr("file", [{string,a0(),Name},{integer,a0(),Line}]); + attr(file, [{string,a0(),Name},{integer,a0(),Line}]); lattribute(record, {Name,Is}, Opts) -> - Nl = leaf(format("-record(~w,", [Name])), + Nl = [leaf("-record("),{atom,Name},$,], [{first,Nl,record_fields(Is, Opts)},$)]; lattribute(Name, Arg, Options) -> - attr(write(Name), [abstract(Arg, Options)]). + attr(Name, [abstract(Arg, Options)]). abstract(Arg, #options{encoding = Encoding}) -> erl_parse:abstract(Arg, [{encoding,Encoding}]). @@ -340,7 +343,7 @@ ltype({user_type,Line,T,Ts}, _) -> ltype({remote_type,Line,[M,F,Ts]}, _) -> simple_type({remote,Line,M,F}, Ts); ltype({atom,_,T}, _) -> - leaf(write(T)); + {atom,T}; ltype(E, P) -> lexpr(E, P, options(none)). @@ -382,12 +385,12 @@ tuple_type(Ts, F) -> specattr(SpecKind, {FuncSpec,TypeSpecs}) -> Func = case FuncSpec of {F,_A} -> - format("~w", [F]); + {atom,F}; {M,F,_A} -> - format("~w:~w", [M, F]) + [{atom,M},$:,{atom,F}] end, {first,leaf(lists:concat(["-", SpecKind, " "])), - {list,[{first,leaf(Func),spec_clauses(TypeSpecs)}]}}. + {list,[{first,Func,spec_clauses(TypeSpecs)}]}}. spec_clauses(TypeSpecs) -> {prefer_nl,[$;],[sig_type(T) || T <- TypeSpecs]}. @@ -429,7 +432,10 @@ ltypes(Ts, F, Prec) -> [F(T, Prec) || T <- Ts]. attr(Name, Args) -> - call({var,a0(),format("-~s", [Name])}, Args, 0, options(none)). + {first,[$-,{atom,Name}],args(Args, options(none))}. + +attrib(Name, Args) -> + {first,[$-,{atom,Name}],[{seq,$(,$),[$,],Args}]}. pname(['' | As]) -> [$. | pname(As)]; @@ -441,10 +447,13 @@ pname(A) when is_atom(A) -> write(A). falist([]) -> - {nil,a0()}; -falist([{Name,Arity}|Falist]) -> - A = a0(), - {cons,A,{var,A,format("~w/~w", [Name,Arity])},falist(Falist)}. + [leaf("[]")]; +falist(Falist) -> + L = [begin + {Name,Arity} = Fa, + [{atom,Name},leaf(format("/~w", [Arity]))] + end || Fa <- Falist], + [{seq,$[,$],$,,L}]. lfunction({function,_Line,Name,_Arity,Cs}, Opts) -> Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Opts, Cs), @@ -489,7 +498,7 @@ lexpr({var,_,V}, _, _) -> leaf(format("~ts", [V])); lexpr({char,_,C}, _, _) -> {char,C}; lexpr({integer,_,N}, _, _) -> leaf(write(N)); lexpr({float,_,F}, _, _) -> leaf(write(F)); -lexpr({atom,_,A}, _, _) -> leaf(write(A)); +lexpr({atom,_,A}, _, _) -> {atom,A}; lexpr({string,_,S}, _, _) -> {string,S}; lexpr({nil,_}, _, _) -> '[]'; lexpr({cons,_,H,T}, _, Opts) -> @@ -519,7 +528,7 @@ lexpr({record, _, Name, Fs}, Prec, Opts) -> lexpr({record_field, _, Rec, Name, F}, Prec, Opts) -> {L,P,R} = inop_prec('#'), Rl = lexpr(Rec, L, Opts), - Nl = leaf(format("#~w.", [Name])), + Nl = [$#,{atom,Name},$.], El = [Rl,Nl,lexpr(F, R, Opts)], maybe_paren(P, Prec, El); lexpr({record, _, Rec, Name, Fs}, Prec, Opts) -> @@ -538,12 +547,12 @@ lexpr({record_field, _, Rec, F}, Prec, Opts) -> maybe_paren(P, Prec, El); lexpr({map, _, Fs}, Prec, Opts) -> {P,_R} = preop_prec('#'), - El = {first,leaf("#"),map_fields(Fs, Opts)}, + El = {first,$#,map_fields(Fs, Opts)}, maybe_paren(P, Prec, El); lexpr({map, _, Map, Fs}, Prec, Opts) -> {L,P,_R} = inop_prec('#'), Rl = lexpr(Map, L, Opts), - El = {first,[Rl,leaf("#")],map_fields(Fs, Opts)}, + El = {first,[Rl,$#],map_fields(Fs, Opts)}, maybe_paren(P, Prec, El); lexpr({block,_,Es}, _, Opts) -> {list,[{step,'begin',body(Es, Opts)},'end']}; @@ -563,13 +572,16 @@ lexpr({'receive',_,Cs,To,ToOpt}, _, Opts) -> {step,'after',Al}, 'end']}; lexpr({'fun',_,{function,F,A}}, _Prec, _Opts) -> - leaf(format("fun ~w/~w", [F,A])); -lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Opts) -> - {force_nl,fun_info(Extra),leaf(format("fun ~w/~w", [F,A]))}; -lexpr({'fun',_,{function,M,F,A}}, _Prec, _Opts) + [leaf("fun "),{atom,F},leaf(format("/~w", [A]))]; +lexpr({'fun',L,{function,_,_}=Func,Extra}, Prec, Opts) -> + {force_nl,fun_info(Extra),lexpr({'fun',L,Func}, Prec, Opts)}; +lexpr({'fun',L,{function,M,F,A}}, Prec, Opts) when is_atom(M), is_atom(F), is_integer(A) -> %% For backward compatibility with pre-R15 abstract format. - leaf(format("fun ~w:~w/~w", [M,F,A])); + Mod = erl_parse:abstract(M), + Fun = erl_parse:abstract(F), + Arity = erl_parse:abstract(A), + lexpr({'fun',L,{function,Mod,Fun,Arity}}, Prec, Opts); lexpr({'fun',_,{function,M,F,A}}, _Prec, Opts) -> %% New format in R15. NameItem = lexpr(M, Opts), @@ -660,7 +672,7 @@ lexpr({bin,_,Fs}, _, Opts) -> bit_grp(Fs, Opts); %% Special case for straight values. lexpr({value,_,Val}, _,_) -> - leaf(write(Val)); + {value,Val}; %% Now do the hook. lexpr(Other, _Precedence, #options{hook = none}) -> leaf(format("INVALID-FORM:~w:",[Other])); @@ -676,7 +688,7 @@ call(Name, Args, Prec, Opts) -> maybe_paren(P, Prec, Item). fun_info(Extra) -> - leaf(format("% fun-info: ~w", [Extra])). + [leaf("% fun-info: "),{value,Extra}]. %% BITS: @@ -717,7 +729,7 @@ bit_elem_type(T) -> %% end of BITS record_name(Name) -> - leaf(format("#~w", [Name])). + [$#,{atom,Name}]. record_fields(Fs, Opts) -> tuple(Fs, fun record_field/2, Opts). @@ -919,8 +931,10 @@ frmt(Item, I, PP) -> %%% - {force_nl,ExtraInfo,I}: fun-info (a comment) forces linebreak before I. %%% - {prefer_nl,Sep,IPs}: forces linebreak between Is unlesss negative %%% indentation. +%%% - {atom,A}: an atom %%% - {char,C}: a character %%% - {string,S}: a string. +%%% - {value,T}: a term. %%% - {hook,...}, {ehook,...}: hook expressions. %%% %%% list, first, seq, force_nl, and prefer_nl all accept IPs, where each @@ -981,6 +995,10 @@ f({prefer_nl,Sep,LItems}, I0, ST, WT, PP) -> true -> {insert_newlines(CharsSize2L, I0, ST),nsz(lists:last(Sizes), I0)} end; +f({value,V}, I, ST, WT, PP) -> + f(write_a_value(V, PP), I, ST, WT, PP); +f({atom,A}, I, ST, WT, PP) -> + f(write_an_atom(A, PP), I, ST, WT, PP); f({char,C}, I, ST, WT, PP) -> f(write_a_char(C, PP), I, ST, WT, PP); f({string,S}, I, ST, WT, PP) -> @@ -1119,6 +1137,12 @@ has_nl([C|Cs]) -> has_nl([]) -> false. +write_a_value(V, PP) -> + flat_leaf(write_value(V, PP)). + +write_an_atom(A, PP) -> + flat_leaf(write_atom(A, PP)). + write_a_char(C, PP) -> flat_leaf(write_char(C, PP)). @@ -1135,7 +1159,7 @@ write_a_string([], _N, _Len, _PP) -> write_a_string(S, N, Len, PP) -> SS = string:sub_string(S, 1, N), Sl = write_string(SS, PP), - case (length(Sl) > Len) and (N > ?MIN_SUBSTRING) of + case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of true -> write_a_string(S, N-1, Len, PP); false -> @@ -1147,11 +1171,17 @@ flat_leaf(S) -> L = lists:flatten(S), {leaf,length(L),L}. +write_value(V, PP) -> + (PP#pp.value_fun)(V). + +write_atom(A, PP) -> + (PP#pp.value_fun)(A). + write_string(S, PP) -> - lists:flatten((PP#pp.string_fun)(S)). + (PP#pp.string_fun)(S). write_char(C, PP) -> - lists:flatten((PP#pp.char_fun)(C)). + (PP#pp.char_fun)(C). %% %% Utilities diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index a91143a764..28e5007e5a 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -68,8 +68,8 @@ -export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1, write_latin1_string/2, write_char/1, write_latin1_char/1]). --export([write_string_as_latin1/1, write_string_as_latin1/2, - write_char_as_latin1/1]). +-export([write_atom_as_latin1/1, write_string_as_latin1/1, + write_string_as_latin1/2, write_char_as_latin1/1]). -export([quote_atom/2, char_list/1, latin1_char_list/1, deep_char_list/1, deep_latin1_char_list/1, @@ -344,6 +344,11 @@ write_binary_body(B, _D) -> <<X:L>> = B, [integer_to_list(X),$:,integer_to_list(L)]. +%%% There are two functions to write Unicode atoms: +%%% - they both escape control characters < 160; +%%% - write_atom() never escapes characters >= 160; +%%% - write_atom_as_latin1() also escapes characters >= 255. + %% write_atom(Atom) -> [Char] %% Generate the list of characters needed to print an atom. @@ -351,17 +356,26 @@ write_binary_body(B, _D) -> Atom :: atom(). write_atom(Atom) -> + write_possibly_quoted_atom(Atom, fun write_string/2). + +-spec write_atom_as_latin1(Atom) -> latin1_string() when + Atom :: atom(). + +write_atom_as_latin1(Atom) -> + write_possibly_quoted_atom(Atom, fun write_string_as_latin1/2). + +write_possibly_quoted_atom(Atom, PFun) -> Chars = atom_to_list(Atom), case quote_atom(Atom, Chars) of true -> - write_string(Chars, $'); %' + PFun(Chars, $'); %' false -> Chars end. %% quote_atom(Atom, CharList) %% Return 'true' if atom with chars in CharList needs to be quoted, else -%% return 'false'. +%% return 'false'. Notice that characters >= 160 are always quoted. -spec quote_atom(atom(), chars()) -> boolean(). diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index aabccfc5d9..ff368d02da 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -105,6 +105,8 @@ print(_, _, _, 0, _M, _RF, _Enc, _Str) -> "..."; print(Term, Col, Ll, D, M, RecDefFun, Enc, Str) when Col =< 0 -> %% ensure Col is at least 1 print(Term, 1, Ll, D, M, RecDefFun, Enc, Str); +print(Atom, _Col, _Ll, _D, _M, _RF, Enc, _Str) when is_atom(Atom) -> + write_atom(Atom, Enc); print(Term, Col, Ll, D, M0, RecDefFun, Enc, Str) when is_tuple(Term); is_list(Term); is_map(Term); @@ -407,6 +409,9 @@ print_length({}, _D, _RF, _Enc, _Str) -> {"{}", 2}; print_length(#{}=M, _D, _RF, _Enc, _Str) when map_size(M) =:= 0 -> {"#{}", 3}; +print_length(Atom, _D, _RF, Enc, _Str) when is_atom(Atom) -> + S = write_atom(Atom, Enc), + {S, lists:flatlength(S)}; print_length(List, D, RF, Enc, Str) when is_list(List) -> %% only flat lists are "printable" case Str andalso printable_list(List, D, Enc) of @@ -500,7 +505,7 @@ print_length_tuple(Tuple, D, RF, Enc, Str) -> print_length_record(_Tuple, 1, _RF, _RDefs, _Enc, _Str) -> {"{...}", 5}; print_length_record(Tuple, D, RF, RDefs, Enc, Str) -> - Name = [$# | io_lib:write_atom(element(1, Tuple))], + Name = [$# | write_atom(element(1, Tuple), Enc)], NameL = length(Name), Elements = tl(tuple_to_list(Tuple)), L = print_length_fields(RDefs, D - 1, Elements, RF, Enc, Str), @@ -515,7 +520,7 @@ print_length_fields([Def | Defs], D, [E | Es], RF, Enc, Str) -> print_length_fields(Defs, D - 1, Es, RF, Enc, Str)]. print_length_field(Def, D, E, RF, Enc, Str) -> - Name = io_lib:write_atom(Def), + Name = write_atom(Def, Enc), {S, L} = print_length(E, D, RF, Enc, Str), NameL = length(Name) + 3, {{field, Name, NameL, {S, L}}, NameL + L}. @@ -664,6 +669,11 @@ printable_char(C,unicode) -> C > 16#DFFF andalso C < 16#FFFE orelse C > 16#FFFF andalso C =< 16#10FFFF. +write_atom(A, latin1) -> + io_lib:write_atom_as_latin1(A); +write_atom(A, _Uni) -> + io_lib:write_atom(A). + write_string(S, latin1) -> io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index 28f37ef8bf..394f4f2fa4 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -349,10 +349,16 @@ default_prompt(N) -> %% Don't bother flattening the list irrespective of what the %% I/O-protocol states. case is_alive() of - true -> io_lib:format(<<"(~s)~w> ">>, [node(), N]); + true -> io_lib:format(<<"(~ts)~w> ">>, [node_string(), N]); false -> io_lib:format(<<"~w> ">>, [N]) end. +node_string() -> + case encoding() of + latin1 -> io_lib:write_atom_as_latin1(node()); + _ -> io_lib:write_atom(node()) + end. + %% expand_hist(Expressions, CommandNumber) %% Preprocess the expression list replacing all history list commands %% with their expansions. @@ -967,10 +973,11 @@ local_func(f, [{var,_,Name}], Bs, _Shell, _RT, _Lf, _Ef) -> {value,ok,erl_eval:del_binding(Name, Bs)}; local_func(f, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) -> erlang:raise(error, function_clause, [{shell,f,1}]); -local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> +local_func(rd, [{atom,_,RecName0},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> RecDef = expand_value(RecDef0), RDs = lists:flatten(erl_pp:expr(RecDef)), - Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]), + RecName = io_lib:write_atom_as_latin1(RecName0), + Attr = lists:concat(["-record(", RecName, ",", RDs, ")."]), {ok, Tokens, _} = erl_scan:string(Attr), case erl_parse:parse_form(Tokens) of {ok,AttrForm} -> @@ -1417,9 +1424,11 @@ columns() -> {ok,N} -> N; _ -> 80 end. + encoding() -> [{encoding, Encoding}] = enc(), Encoding. + enc() -> case lists:keyfind(encoding, 1, io:getopts()) of false -> [{encoding,latin1}]; % should never happen diff --git a/lib/stdlib/test/erl_pp_SUITE.erl b/lib/stdlib/test/erl_pp_SUITE.erl index 1a028204b4..808ba9b4c1 100644 --- a/lib/stdlib/test/erl_pp_SUITE.erl +++ b/lib/stdlib/test/erl_pp_SUITE.erl @@ -51,7 +51,7 @@ otp_6321/1, otp_6911/1, otp_6914/1, otp_8150/1, otp_8238/1, otp_8473/1, otp_8522/1, otp_8567/1, otp_8664/1, otp_9147/1, otp_10302/1, otp_10820/1, otp_11100/1, otp_11861/1, pr_1014/1, - otp_13662/1]). + otp_13662/1, otp_14285/1]). %% Internal export. -export([ehook/6]). @@ -80,7 +80,8 @@ groups() -> {tickets, [], [otp_6321, otp_6911, otp_6914, otp_8150, otp_8238, otp_8473, otp_8522, otp_8567, otp_8664, otp_9147, - otp_10302, otp_10820, otp_11100, otp_11861, pr_1014, otp_13662]}]. + otp_10302, otp_10820, otp_11100, otp_11861, pr_1014, otp_13662, + otp_14285]}]. init_per_suite(Config) -> Config. @@ -627,11 +628,6 @@ do_hook(HookFun) -> true = "{some,value}" =:= lists:flatten(erl_pp:expr({value,A0,{some,value}})), - %% Silly... - true = - "if true -> 0 end" =:= - flat_expr({'if',0,[{clause,0,[],[],[{atom,0,0}]}]}), - %% More compatibility: before R6 OldIf = {'if',A0,[{clause,A0,[],[{atom,A0,true}],[{atom,A0,b}]}]}, NewIf = {'if',A0,[{clause,A0,[],[[{atom,A0,true}]],[{atom,A0,b}]}]}, @@ -1069,9 +1065,6 @@ otp_11100(Config) when is_list(Config) -> %% doesn't make a difference (pp:bit_elem_type/1 is an example). A1 = erl_anno:new(1), - %% Cannot trigger the use of the hook function with export/import. - "-export([{fy,a}/b]).\n" = - pf({attribute,A1,export,[{{fy,a},b}]}), "-type foo() :: integer(INVALID-FORM:{foo,bar}:).\n" = pf({attribute,A1,type,{foo,{type,A1,integer,[{foo,bar}]},[]}}), pf({attribute,A1,type, @@ -1146,6 +1139,34 @@ otp_13662(Config) -> ], compile(Config, Ts). +otp_14285(_Config) -> + pp_forms(<<"-export([t/0, '\\x{400}\\''/0]).">>), + pp_forms(<<"-import(lists, [append/2]).">>), + pp_forms(<<"-optional_callbacks([]).">>), + pp_forms(<<"-optional_callbacks(['\\x{400}\\''/1]).">>), + pp_forms(<<"-'\\x{400}\\''('\\x{400}\\'').">>), + pp_forms(<<"-type '\\x{400}\\''() :: '\\x{400}\\''.">>), + pp_forms(<<"-record('\\x{400}\\'', {'\\x{400}\\''}).">>), + pp_forms(<<"-callback '\\x{400}\\''(_) -> '\\x{400}\\''.">>), + pp_forms(<<"t() -> '\\x{400}\\''('\\x{400}\\'').">>), + pp_forms(<<"'\\x{400}\\''(_) -> '\\x{400}\\''.">>), + pp_forms(<<"-spec '\\x{400}'() -> " + "#'\\x{400}'{'\\x{400}' :: '\\x{400}'}.">>), + pp_forms(<<"'\\x{400}\\''() ->" + "R = #'\\x{400}\\''{}," + "#'\\x{400}\\''{'\\x{400}\\'' =" + "{'\\x{400}\\''," + "fun '\\x{400}\\''/0," + "R#'\\x{400}\\''.'\\x{400}\\''," + "#'\\x{400}\\''.'\\x{400}\\''}}.">>), + + %% Special... + true = + "{some,'\\x{400}\\''}" =:= + lists:flatten(erl_pp:expr({value,erl_anno:new(0),{some,'\x{400}\''}}, + [{encoding,latin1}])), + ok. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% compile(Config, Tests) -> diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index d546e8fad2..b2754e47ba 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -30,7 +30,8 @@ io_lib_print_binary_depth_one/1, otp_10302/1, otp_10755/1, otp_10836/1, io_lib_width_too_small/1, io_with_huge_message_queue/1, format_string/1, - maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1]). + maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1, + otp_14285/1]). -export([pretty/2]). @@ -61,7 +62,8 @@ all() -> printable_range, bad_printable_range, io_lib_print_binary_depth_one, otp_10302, otp_10755, otp_10836, io_lib_width_too_small, io_with_huge_message_queue, - format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175]. + format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175, + otp_14285]. %% Error cases for output. error_1(Config) when is_list(Config) -> @@ -755,6 +757,8 @@ rfd(rrrrr, 3) -> [f1, f2, f3]; rfd(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, 0) -> []; +rfd('\x{400}', 1) -> + ['\x{400}']; rfd(_, _) -> no. @@ -1881,6 +1885,7 @@ otp_10302(Suite) when is_list(Suite) -> pretty(Term, Depth) when is_integer(Depth) -> Opts = [{column, 1}, {line_length, 20}, {depth, Depth}, {max_chars, 60}, + {record_print_fun, fun rfd/2}, {encoding, unicode}], pretty(Term, Opts); pretty(Term, Opts) when is_list(Opts) -> @@ -2324,3 +2329,23 @@ text1([T|Ts]) -> [erl_anno:text(Anno) | text1(Ts)]. -endif. % EXACT + +otp_14285(_Config) -> + UOpts = [{record_print_fun, fun rfd/2}, + {encoding, unicode}], + LOpts = [{record_print_fun, fun rfd/2}, + {encoding, latin1}], + + RT = {'\x{400}','\x{400}'}, + "#'\x{400}'{'\x{400}' = '\x{400}'}" = pretty(RT, UOpts), + "#'\\x{400}'{'\\x{400}' = '\\x{400}'}" = pretty(RT, LOpts), + + Chars = lists:seq(0, 512), + [] = [C || + C <- Chars, + S <- io_lib:write_atom_as_latin1(list_to_atom([C])), + not is_latin1(S)], + L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])), + not is_latin1(S)], + L1 = lists:seq(256, 512), + ok. diff --git a/lib/stdlib/test/shell_SUITE.erl b/lib/stdlib/test/shell_SUITE.erl index 4864bc3d72..56002dda25 100644 --- a/lib/stdlib/test/shell_SUITE.erl +++ b/lib/stdlib/test/shell_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. +%% Copyright Ericsson AB 2004-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -30,7 +30,8 @@ progex_bit_syntax/1, progex_records/1, progex_lc/1, progex_funs/1, otp_5990/1, otp_6166/1, otp_6554/1, - otp_7184/1, otp_7232/1, otp_8393/1, otp_10302/1, otp_13719/1]). + otp_7184/1, otp_7232/1, otp_8393/1, otp_10302/1, otp_13719/1, + otp_14285/1]). -export([ start_restricted_from_shell/1, start_restricted_on_command_line/1,restricted_local/1]). @@ -91,7 +92,7 @@ groups() -> progex_funs]}, {tickets, [], [otp_5990, otp_6166, otp_6554, otp_7184, - otp_7232, otp_8393, otp_10302, otp_13719]}]. + otp_7232, otp_8393, otp_10302, otp_13719, otp_14285]}]. init_per_suite(Config) -> Config. @@ -2824,6 +2825,22 @@ otp_13719(Config) when is_list(Config) -> file:delete(File), ok. +otp_14285(Config) -> + {ok,Node} = start_node(shell_suite_helper_4, + "-pa "++proplists:get_value(priv_dir,Config)++ + " +pc unicode"), + Test1 = + <<"begin + io:setopts([{encoding,utf8}]), + [1024] = atom_to_list('\\x{400}'), + rd('\\x{400}', {'\\x{400}' = '\\x{400}'}), + ok = rl('\\x{400}') + end.">>, + "-record('\x{400}',{'\x{400}' = '\x{400}'}).\nok.\n" = + t({Node,Test1}), + test_server:stop_node(Node), + ok. + scan(B) -> F = fun(Ts) -> case erl_parse:parse_term(Ts) of diff --git a/lib/syntax_tools/src/erl_prettypr.erl b/lib/syntax_tools/src/erl_prettypr.erl index 378d69095d..40ddd2b22a 100644 --- a/lib/syntax_tools/src/erl_prettypr.erl +++ b/lib/syntax_tools/src/erl_prettypr.erl @@ -452,7 +452,7 @@ lay_2(Node, Ctxt) -> text(erl_syntax:variable_literal(Node)); atom -> - text(erl_syntax:atom_literal(Node)); + text(erl_syntax:atom_literal(Node, Ctxt#ctxt.encoding)); integer -> text(erl_syntax:integer_literal(Node)); diff --git a/lib/syntax_tools/src/erl_syntax.erl b/lib/syntax_tools/src/erl_syntax.erl index 4347cc46c1..9b2b503762 100644 --- a/lib/syntax_tools/src/erl_syntax.erl +++ b/lib/syntax_tools/src/erl_syntax.erl @@ -139,6 +139,7 @@ is_atom/2, atom_value/1, atom_literal/1, + atom_literal/2, atom_name/1, attribute/1, attribute/2, @@ -1841,7 +1842,7 @@ char_literal(Node) -> %% @doc Returns the literal string represented by a `char' %% node. This includes the leading "`$'" character. %% Depending on the encoding a character beyond 255 will be escaped -%% ('latin1') or copied as is ('utf8'). +%% (`latin1') or copied as is (`utf8'). %% %% @see char/1 @@ -1944,7 +1945,7 @@ string_literal(Node) -> %% @doc Returns the literal string represented by a `string' %% node. This includes surrounding double-quote characters. %% Depending on the encoding characters beyond 255 will be escaped -%% ('latin1') or copied as is ('utf8'). +%% (`latin1') or copied as is (`utf8'). %% %% @see string/1 @@ -1965,6 +1966,7 @@ string_literal(Node, latin1) -> %% @see atom_value/1 %% @see atom_name/1 %% @see atom_literal/1 +%% @see atom_literal/2 %% @see is_atom/2 %% type(Node) = atom @@ -2037,6 +2039,7 @@ atom_name(Node) -> %% ===================================================================== %% @doc Returns the literal string represented by an `atom' %% node. This includes surrounding single-quote characters if necessary. +%% Characters beyond 255 will be escaped. %% %% Note that e.g. the result of `atom("x\ny")' represents %% any and all of `'x\ny'', `'x\12y'', @@ -2048,8 +2051,24 @@ atom_name(Node) -> -spec atom_literal(syntaxTree()) -> string(). atom_literal(Node) -> - io_lib:write_atom(atom_value(Node)). + atom_literal(Node, latin1). + +%% ===================================================================== +%% @doc Returns the literal string represented by an `atom' +%% node. This includes surrounding single-quote characters if necessary. +%% Depending on the encoding a character beyond 255 will be escaped +%% (`latin1') or copied as is (`utf8'). +%% +%% @see atom/1 +%% @see atom_literal/1 +%% @see string/1 +atom_literal(Node, utf8) -> + io_lib:write_atom(atom_value(Node)); +atom_literal(Node, unicode) -> + io_lib:write_atom(atom_value(Node)); +atom_literal(Node, latin1) -> + io_lib:write_atom_as_latin1(atom_value(Node)). %% ===================================================================== %% @equiv map_expr(none, Fields) |