diff options
author | Hans Bolinder <[email protected]> | 2012-10-04 15:58:26 +0200 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2013-01-02 10:15:17 +0100 |
commit | 300c5466a7c9cfe3ed22bba2a88ba21058406402 (patch) | |
tree | b8c30800b17d5ae98255de2fd2818d8b5d4d6eba /lib/stdlib/src | |
parent | 7a884a31cfcaaf23f7920ba1a006aa2855529030 (diff) | |
download | otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.gz otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.bz2 otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.zip |
[stdlib, kernel] Introduce Unicode support for Erlang source files
Expect modifications, additions and corrections.
There is a kludge in file_io_server and
erl_scan:continuation_location() that's not so pleasing.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/src/epp.erl | 211 | ||||
-rw-r--r-- | lib/stdlib/src/erl_lint.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/erl_parse.yrl | 128 | ||||
-rw-r--r-- | lib/stdlib/src/erl_pp.erl | 759 | ||||
-rw-r--r-- | lib/stdlib/src/erl_scan.erl | 225 | ||||
-rw-r--r-- | lib/stdlib/src/escript.erl | 29 | ||||
-rw-r--r-- | lib/stdlib/src/io.erl | 77 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib.erl | 67 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_format.erl | 37 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 226 | ||||
-rw-r--r-- | lib/stdlib/src/lib.erl | 173 | ||||
-rw-r--r-- | lib/stdlib/src/shell.erl | 109 |
13 files changed, 1288 insertions, 758 deletions
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile index 14304824d3..575a5cbe4a 100644 --- a/lib/stdlib/src/Makefile +++ b/lib/stdlib/src/Makefile @@ -171,6 +171,7 @@ primary_bootstrap_compiler: \ $(BOOTSTRAP_COMPILER)/ebin/erl_scan.beam \ $(BOOTSTRAP_COMPILER)/ebin/erl_parse.beam \ $(BOOTSTRAP_COMPILER)/ebin/erl_lint.beam \ + $(BOOTSTRAP_COMPILER)/ebin/io.beam \ $(BOOTSTRAP_COMPILER)/ebin/otp_internal.beam $(BOOTSTRAP_COMPILER)/ebin/erl_parse.beam: erl_parse.yrl diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index 2c8d84a9e1..a0f7660ecf 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -23,13 +23,18 @@ -export([open/2,open/3,open/5,close/1,format_error/1]). -export([scan_erl_form/1,parse_erl_form/1,macro_defs/1]). -export([parse_file/1, parse_file/3]). +-export([default_encoding/0, encoding_to_string/1, + read_encoding/1, read_encoding/2, set_encoding/1]). -export([interpret_file_attribute/1]). -export([normalize_typed_record_fields/1,restore_typed_record_fields/1]). %%------------------------------------------------------------------------ +-export_type([source_encoding/0]). + -type macros() :: [{atom(), term()}]. -type epp_handle() :: pid(). +-type source_encoding() :: latin1 | utf8. %% Epp state record. -record(epp, {file, %Current file @@ -213,6 +218,173 @@ parse_file(Epp) -> [{eof,Location}] end. +-define(DEFAULT_ENCODING, latin1). + +-spec default_encoding() -> source_encoding(). + +default_encoding() -> + ?DEFAULT_ENCODING. + +-spec encoding_to_string(Encoding) -> string() when + Encoding :: source_encoding(). + +encoding_to_string(latin1) -> "coding: latin-1"; +encoding_to_string(utf8) -> "coding: utf-8". + +-spec read_encoding(FileName) -> source_encoding() | none when + FileName :: file:name(). + +read_encoding(Name) -> + read_encoding(Name, []). + +-spec read_encoding(FileName, Options) -> source_encoding() | none when + FileName :: file:name(), + Options :: [Option], + Option :: {in_comment_only, boolean()}. + +read_encoding(Name, Options) -> + InComment = proplists:get_value(in_comment_only, Options, true), + case file:open(Name, [read]) of + {ok,File} -> + try read_encoding_from_file(File, InComment) + after ok = file:close(File) + end; + _Error -> + none + end. + +-spec set_encoding(File) -> source_encoding() | none when + File :: io:device(). % pid(); raw files don't work + +set_encoding(File) -> + Encoding = read_encoding_from_file(File, true), + Enc = case Encoding of + none -> default_encoding(); + Encoding -> Encoding + end, + ok = io:setopts(File, [{encoding, Enc}]), + Encoding. + +-spec read_encoding_from_file(File, InComment) -> source_encoding() | none when + File :: io:device(), + InComment :: boolean(). + +-define(ENC_CHUNK, 32). +-define(N_ENC_CHUNK, 16). % a total of 512 bytes + +read_encoding_from_file(File, InComment) -> + {ok, Pos0} = file:position(File, cur), + Opts = io:getopts(File), + Encoding0 = lists:keyfind(encoding, 1, Opts), + Binary0 = lists:keyfind(binary, 1, Opts), + ok = io:setopts(File, [binary, {encoding, latin1}]), + try + {B, Fun} = (reader(File, 0))(), + com_nl(B, Fun, 0, InComment) + catch + throw:no -> + none + after + {ok, Pos0} = file:position(File, Pos0), + ok = io:setopts(File, [Binary0, Encoding0]) + end. + +reader(Fd, N) -> + fun() when N =:= ?N_ENC_CHUNK -> + throw(no); + () -> + case file:read(Fd, ?ENC_CHUNK) of + eof -> + {<<>>, reader(Fd, N+1)}; + {ok, Bin} -> + {Bin, reader(Fd, N+1)}; + {error, _} -> + throw(no) % ignore errors + end + end. + +com_nl(_, _, 2, _) -> + throw(no); +com_nl(B, Fun, N, false=Com) -> + com_c(B, Fun, N, Com); +com_nl(B, Fun, N, true=Com) -> + com(B, Fun, N, Com). + +com(<<"\n",B/binary>>, Fun, N, Com) -> + com_nl(B, Fun, N+1, Com); +com(<<"%", B/binary>>, Fun, N, Com) -> + com_c(B, Fun, N, Com); +com(<<_:1/unit:8,B/binary>>, Fun, N, Com) -> + com(B, Fun, N, Com); +com(<<>>, Fun, N, Com) -> + {B, Fun1} = Fun(), + com(B, Fun1, N, Com). + +com_c(<<"c",B/binary>>, Fun, N, Com) -> + com_oding(B, Fun, N, Com); +com_c(<<"\n",B/binary>>, Fun, N, Com) -> + com_nl(B, Fun, N+1, Com); +com_c(<<_:1/unit:8,B/binary>>, Fun, N, Com) -> + com_c(B, Fun, N, Com); +com_c(<<>>, Fun, N, Com) -> + {B, Fun1} = Fun(), + com_c(B, Fun1, N, Com). + +com_oding(<<"oding",B/binary>>, Fun, N, Com) -> + com_sep(B, Fun, N, Com); +com_oding(B, Fun, N, Com) when byte_size(B) >= length("oding") -> + com_c(B, Fun, N, Com); +com_oding(B, Fun, N, Com) -> + {B1, Fun1} = Fun(), + com_oding(list_to_binary([B, B1]), Fun1, N, Com). + +com_sep(<<":",B/binary>>, Fun, N, Com) -> + com_space(B, Fun, N, Com); +com_sep(<<"=",B/binary>>, Fun, N, Com) -> + com_space(B, Fun, N, Com); +com_sep(<<"\s",B/binary>>, Fun, N, Com) -> + com_sep(B, Fun, N, Com); +com_sep(<<>>, Fun, N, Com) -> + {B, Fun1} = Fun(), + com_sep(B, Fun1, N, Com); +com_sep(B, Fun, N, Com) -> + com_c(B, Fun, N, Com). + +com_space(<<"\s",B/binary>>, Fun, N, Com) -> + com_space(B, Fun, N, Com); +com_space(<<>>, Fun, N, Com) -> + {B, Fun1} = Fun(), + com_space(B, Fun1, N, Com); +com_space(B, Fun, N, _Com) -> + com_enc(B, Fun, N, [], []). + +com_enc(<<C:1/unit:8,B/binary>>, Fun, N, L, Ps) when C >= $a, C =< $z; + C >= $A, C =< $Z; + C >= $0, C =< $9 -> + com_enc(B, Fun, N, [C | L], Ps); +com_enc(<<>>, Fun, N, L, Ps) -> + case Fun() of + {<<>>, _} -> + com_enc_end([L | Ps]); + {B, Fun1} -> + com_enc(B, Fun1, N, L, Ps) + end; +com_enc(<<"-",B/binary>>, Fun, N, L, Ps) -> + com_enc(B, Fun, N, [], [L | Ps]); +com_enc(_B, _Fun, _N, L, Ps) -> + com_enc_end([L | Ps]). + +com_enc_end(Ps0) -> + Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]), + com_encoding(Ps). + +com_encoding(["latin","1"|_]) -> + latin1; +com_encoding(["utf","8"|_]) -> + utf8; +com_encoding(_) -> + throw(no). % Don't try any further + normalize_typed_record_fields([]) -> {typed, []}; normalize_typed_record_fields(Fields) -> @@ -266,14 +438,17 @@ init_server(Pid, Name, File, AtLocation, Path, Pdm, Pre) -> Ms0 = predef_macros(Name), case user_predef(Pdm, Ms0) of {ok,Ms1} -> - epp_reply(Pid, {ok,self()}), - %% ensure directory of current source file is first in path + _ = set_encoding(File), + epp_reply(Pid, {ok,self()}), + %% ensure directory of current source file is + %% first in path Path1 = [filename:dirname(Name) | Path], - St = #epp{file=File, location=AtLocation, delta=0, name=Name, - name2=Name, path=Path1, macs=Ms1, pre_opened = Pre}, - From = wait_request(St), - enter_file_reply(From, Name, AtLocation, AtLocation), - wait_req_scan(St); + St = #epp{file=File, location=AtLocation, delta=0, + name=Name, name2=Name, path=Path1, macs=Ms1, + pre_opened = Pre}, + From = wait_request(St), + enter_file_reply(From, Name, AtLocation, AtLocation), + wait_req_scan(St); {error,E} -> epp_reply(Pid, {error,E}) end. @@ -385,19 +560,20 @@ enter_file(NewName, Inc, From, St) -> %% enter_file2(File, FullName, From, EppState, AtLocation) -> EppState. %% Set epp to use this file and "enter" it. -enter_file2(NewF, Pname, From, St, AtLocation) -> +enter_file2(NewF, Pname, From, St0, AtLocation) -> Loc = start_loc(AtLocation), enter_file_reply(From, Pname, Loc, AtLocation), - Ms = dict:store({atom,'FILE'}, {none,[{string,Loc,Pname}]}, St#epp.macs), + Ms = dict:store({atom,'FILE'}, {none,[{string,Loc,Pname}]}, St0#epp.macs), %% update the head of the include path to be the directory of the new %% source file, so that an included file can always include other files %% relative to its current location (this is also how C does it); note %% that the directory of the parent source file (the previous head of %% the path) must be dropped, otherwise the path used within the current %% file will depend on the order of file inclusions in the parent files - Path = [filename:dirname(Pname) | tl(St#epp.path)], + Path = [filename:dirname(Pname) | tl(St0#epp.path)], + _ = set_encoding(NewF), #epp{file=NewF,location=Loc,name=Pname,delta=0, - sstk=[St|St#epp.sstk],path=Path,macs=Ms}. + sstk=[St0|St0#epp.sstk],path=Path,macs=Ms}. enter_file_reply(From, Name, Location, AtLocation) -> Attr = loc_attr(AtLocation), @@ -456,7 +632,7 @@ leave_file(From, St) -> %% scan_toks(Tokens, From, EppState) scan_toks(From, St) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location) of + case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of {ok,Toks,Cl} -> scan_toks(Toks, From, St#epp{location=Cl}); {error,E,Cl} -> @@ -830,7 +1006,7 @@ new_location(Ln, {Le,_}, {Lf,_}) -> %% nested conditionals and repeated 'else's. skip_toks(From, St, [I|Sis]) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location) of + case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of {ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} -> skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]); {ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} -> @@ -1094,6 +1270,7 @@ expand_arg([], Ts, L, Rest, Bs) -> %%% tokenized would yield the token list Ts. %% erl_scan:token_info(T, text) is not backward compatible with this. +%% Note that escaped characters will be replaced by themselves. token_src({dot, _}) -> "."; token_src({X, _}) when is_atom(X) -> @@ -1101,16 +1278,16 @@ token_src({X, _}) when is_atom(X) -> token_src({var, _, X}) -> atom_to_list(X); token_src({char,_,C}) -> - io_lib:write_char(C); + io_lib:write_unicode_char(C); token_src({string, _, X}) -> - lists:flatten(io_lib:format("~p", [X])); + io_lib:write_unicode_string(X); token_src({_, _, X}) -> - lists:flatten(io_lib:format("~w", [X])). + io_lib:format("~w", [X]). stringify1([]) -> []; stringify1([T | Tokens]) -> - [io_lib:format(" ~s", [token_src(T)]) | stringify1(Tokens)]. + [io_lib:format(" ~ts", [token_src(T)]) | stringify1(Tokens)]. stringify(Ts, L) -> [$\s | S] = lists:flatten(stringify1(Ts)), diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 1e5f962375..0a442d950f 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3618,6 +3618,10 @@ extract_sequence(4, [$t, $c | Fmt], Need) -> extract_sequence(5, [$c|Fmt], Need); extract_sequence(4, [$t, $s | Fmt], Need) -> extract_sequence(5, [$s|Fmt], Need); +extract_sequence(4, [$t, $p | Fmt], Need) -> + extract_sequence(5, [$p|Fmt], Need); +extract_sequence(4, [$t, $P | Fmt], Need) -> + extract_sequence(5, [$P|Fmt], Need); extract_sequence(4, [$t, C | _Fmt], _Need) -> {error,"invalid control ~t" ++ [C]}; extract_sequence(4, Fmt, Need) -> diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl index 928c10f7f2..27a2ba80eb 100644 --- a/lib/stdlib/src/erl_parse.yrl +++ b/lib/stdlib/src/erl_parse.yrl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -923,73 +923,77 @@ normalise_list([]) -> -spec abstract(Data) -> AbsTerm when Data :: term(), AbsTerm :: abstract_expr(). -abstract(T) when is_integer(T) -> {integer,0,T}; -abstract(T) when is_float(T) -> {float,0,T}; -abstract(T) when is_atom(T) -> {atom,0,T}; -abstract([]) -> {nil,0}; -abstract(B) when is_bitstring(B) -> - {bin, 0, [abstract_byte(Byte, 0) || Byte <- bitstring_to_list(B)]}; -abstract([C|T]) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C]); -abstract([H|T]) -> - {cons,0,abstract(H),abstract(T)}; -abstract(Tuple) when is_tuple(Tuple) -> - {tuple,0,abstract_list(tuple_to_list(Tuple))}. - -abstract_string([C|T], String) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C|String]); -abstract_string([], String) -> - {string, 0, lists:reverse(String)}; -abstract_string(T, String) -> - not_string(String, abstract(T)). - -not_string([C|T], Result) -> - not_string(T, {cons, 0, {integer, 0, C}, Result}); -not_string([], Result) -> +abstract(T) -> + abstract(T, 0, epp:default_encoding()). + +%%% abstract/2 takes line and encoding options +-spec abstract(Data, Options) -> AbsTerm when + Data :: term(), + Options :: Line | [Option], + Option :: {line, Line} | {encoding, Encoding}, + Encoding :: latin1 | unicode | utf8, + Line :: erl_scan:line(), + AbsTerm :: abstract_expr(). + +abstract(T, Line) when is_integer(Line) -> + abstract(T, Line, epp:default_encoding()); +abstract(T, Options) when is_list(Options) -> + Line = proplists:get_value(line, Options, 0), + Encoding = proplists:get_value(encoding, Options,epp:default_encoding()), + abstract(T, Line, Encoding). + +-define(UNICODE(C), + (C >= 0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF)). + +abstract(T, L, _E) when is_integer(T) -> {integer,L,T}; +abstract(T, L, _E) when is_float(T) -> {float,L,T}; +abstract(T, L, _E) when is_atom(T) -> {atom,L,T}; +abstract([], L, _E) -> {nil,L}; +abstract(B, L, _E) when is_bitstring(B) -> + {bin, L, [abstract_byte(Byte, L) || Byte <- bitstring_to_list(B)]}; +abstract([C|T], L, unicode=E) when ?UNICODE(C) -> + abstract_unicode_string(T, [C], L, E); +abstract([C|T], L, utf8=E) when ?UNICODE(C) -> + abstract_unicode_string(T, [C], L, E); +abstract([C|T], L, latin1=E) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C], L, E); +abstract([H|T], L, E) -> + {cons,L,abstract(H, L, E),abstract(T, L, E)}; +abstract(Tuple, L, E) when is_tuple(Tuple) -> + {tuple,L,abstract_list(tuple_to_list(Tuple), L, E)}. + +abstract_string([C|T], String, L, E) when is_integer(C), 0 =< C, C < 256 -> + abstract_string(T, [C|String], L, E); +abstract_string([], String, L, _E) -> + {string, L, lists:reverse(String)}; +abstract_string(T, String, L, E) -> + not_string(String, abstract(T, L, E), L, E). + +abstract_unicode_string([C|T], String, L, E) when ?UNICODE(C) -> + abstract_unicode_string(T, [C|String], L, E); +abstract_unicode_string([], String, L, _E) -> + {string, L, lists:reverse(String)}; +abstract_unicode_string(T, String, L, E) -> + not_string(String, abstract(T, L, E), L, E). + +not_string([C|T], Result, L, E) -> + not_string(T, {cons, L, {integer, L, C}, Result}, L, E); +not_string([], Result, _L, _E) -> Result. -abstract_list([H|T]) -> - [abstract(H)|abstract_list(T)]; -abstract_list([]) -> +abstract_list([H|T], L, E) -> + [abstract(H, L, E)|abstract_list(T, L, E)]; +abstract_list([], _L, _E) -> []. -abstract_byte(Byte, Line) when is_integer(Byte) -> - {bin_element, Line, {integer, Line, Byte}, default, default}; -abstract_byte(Bits, Line) -> +abstract_byte(Byte, L) when is_integer(Byte) -> + {bin_element, L, {integer, L, Byte}, default, default}; +abstract_byte(Bits, L) -> Sz = bit_size(Bits), <<Val:Sz>> = Bits, - {bin_element, Line, {integer, Line, Val}, {integer, Line, Sz}, default}. - -%%% abstract/2 keeps the line number -abstract(T, Line) when is_integer(T) -> {integer,Line,T}; -abstract(T, Line) when is_float(T) -> {float,Line,T}; -abstract(T, Line) when is_atom(T) -> {atom,Line,T}; -abstract([], Line) -> {nil,Line}; -abstract(B, Line) when is_bitstring(B) -> - {bin, Line, [abstract_byte(Byte, Line) || Byte <- bitstring_to_list(B)]}; -abstract([C|T], Line) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C], Line); -abstract([H|T], Line) -> - {cons,Line,abstract(H, Line),abstract(T, Line)}; -abstract(Tuple, Line) when is_tuple(Tuple) -> - {tuple,Line,abstract_list(tuple_to_list(Tuple), Line)}. - -abstract_string([C|T], String, Line) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C|String], Line); -abstract_string([], String, Line) -> - {string, Line, lists:reverse(String)}; -abstract_string(T, String, Line) -> - not_string(String, abstract(T, Line), Line). - -not_string([C|T], Result, Line) -> - not_string(T, {cons, Line, {integer, Line, C}, Result}, Line); -not_string([], Result, _Line) -> - Result. - -abstract_list([H|T], Line) -> - [abstract(H, Line)|abstract_list(T, Line)]; -abstract_list([], _Line) -> - []. + {bin_element, L, {integer, L, Val}, {integer, L, Sz}, default}. %% Generate a list of tokens representing the abstract term. diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 6b5aa951cf..0383ce6839 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -26,7 +26,7 @@ guard/1,guard/2,exprs/1,exprs/2,exprs/3,expr/1,expr/2,expr/3,expr/4]). -import(lists, [append/1,foldr/3,mapfoldl/3,reverse/1,reverse/2]). --import(io_lib, [write/1,format/2,write_char/1,write_string/1]). +-import(io_lib, [write/1,format/2]). -import(erl_parse, [inop_prec/1,preop_prec/1,func_prec/0,max_prec/0]). -define(MAXLINE, 72). @@ -36,7 +36,15 @@ CurrentIndentation :: integer(), CurrentPrecedence :: non_neg_integer(), HookFunction :: hook_function()) -> - io_lib:chars())). + io_lib:chars())). + +-type(option() :: {hook, hook_function()} + | {encoding, latin1 | unicode | utf8}). +-type(options() :: hook_function() | [option()]). + +-record(pp, {string_fun, char_fun}). + +-record(options, {hook, encoding, opts}). %%% %%% Exported functions @@ -48,12 +56,12 @@ form(Thing) -> form(Thing, none). --spec(form(Form, HookFunction) -> io_lib:chars() when +-spec(form(Form, Options) -> io_lib:chars() when Form :: erl_parse:abstract_form(), - HookFunction :: hook_function()). + Options :: options()). -form(Thing, Hook) -> - frmt(lform(Thing, Hook)). +form(Thing, Options) -> + frmt(lform(Thing, options(Options)), state(Options)). -spec(attribute(Attribute) -> io_lib:chars() when Attribute :: erl_parse:abstract_form()). @@ -61,12 +69,12 @@ form(Thing, Hook) -> attribute(Thing) -> attribute(Thing, none). --spec(attribute(Attribute, HookFunction) -> io_lib:chars() when +-spec(attribute(Attribute, Options) -> io_lib:chars() when Attribute :: erl_parse:abstract_form(), - HookFunction :: hook_function()). + Options :: options()). -attribute(Thing, Hook) -> - frmt(lattribute(Thing, Hook)). +attribute(Thing, Options) -> + frmt(lattribute(Thing, options(Options)), state(Options)). -spec(function(Function) -> io_lib:chars() when Function :: erl_parse:abstract_form()). @@ -74,18 +82,18 @@ attribute(Thing, Hook) -> function(F) -> function(F, none). --spec(function(Function, HookFunction) -> io_lib:chars() when +-spec(function(Function, Options) -> io_lib:chars() when Function :: erl_parse:abstract_form(), - HookFunction :: hook_function()). + Options :: options()). -function(F, Hook) -> - frmt(lfunction(F, Hook)). +function(F, Options) -> + frmt(lfunction(F, options(Options)), state(Options)). rule(R) -> rule(R, none). -rule(R, Hook) -> - frmt(lrule(R, Hook)). +rule(R, Options) -> + frmt(lrule(R, options(Options)), state(Options)). -spec(guard(Guard) -> io_lib:chars() when Guard :: [erl_parse:abstract_expr()]). @@ -93,12 +101,12 @@ rule(R, Hook) -> guard(Gs) -> guard(Gs, none). --spec(guard(Guard, HookFunction) -> io_lib:chars() when +-spec(guard(Guard, Options) -> io_lib:chars() when Guard :: [erl_parse:abstract_expr()], - HookFunction :: hook_function()). + Options :: options()). -guard(Gs, Hook) -> - frmt(lguard(Gs, Hook)). +guard(Gs, Options) -> + frmt(lguard(Gs, options(Options)), state(Options)). -spec(exprs(Expressions) -> io_lib:chars() when Expressions :: [erl_parse:abstract_expr()]). @@ -106,99 +114,129 @@ guard(Gs, Hook) -> exprs(Es) -> exprs(Es, 0, none). --spec(exprs(Expressions, HookFunction) -> io_lib:chars() when +-spec(exprs(Expressions, Options) -> io_lib:chars() when Expressions :: [erl_parse:abstract_expr()], - HookFunction :: hook_function()). + Options :: options()). -exprs(Es, Hook) -> - exprs(Es, 0, Hook). +exprs(Es, Options) -> + exprs(Es, 0, Options). --spec(exprs(Expressions, Indent, HookFunction) -> io_lib:chars() when +-spec(exprs(Expressions, Indent, Options) -> io_lib:chars() when Expressions :: [erl_parse:abstract_expr()], Indent :: integer(), - HookFunction :: hook_function()). + Options :: options()). -exprs(Es, I, Hook) -> - frmt({seq,[],[],[$,],lexprs(Es, Hook)}, I). +exprs(Es, I, Options) -> + frmt({seq,[],[],[$,],lexprs(Es, options(Options))}, I, state(Options)). -spec(expr(Expression) -> io_lib:chars() when Expression :: erl_parse:abstract_expr()). expr(E) -> - frmt(lexpr(E, 0, none)). + frmt(lexpr(E, 0, options(none)), state(none)). --spec(expr(Expression, HookFunction) -> io_lib:chars() when +-spec(expr(Expression, Options) -> io_lib:chars() when Expression :: erl_parse:abstract_expr(), - HookFunction :: hook_function()). + Options :: options()). -expr(E, Hook) -> - frmt(lexpr(E, 0, Hook)). +expr(E, Options) -> + frmt(lexpr(E, 0, options(Options)), state(Options)). --spec(expr(Expression, Indent, HookFunction) -> io_lib:chars() when +-spec(expr(Expression, Indent, Options) -> io_lib:chars() when Expression :: erl_parse:abstract_expr(), Indent :: integer(), - HookFunction :: hook_function()). + Options :: options()). -expr(E, I, Hook) -> - frmt(lexpr(E, 0, Hook), I). +expr(E, I, Options) -> + frmt(lexpr(E, 0, options(Options)), I, state(Options)). --spec(expr(Expression, Indent, Precedence, HookFunction) -> io_lib:chars() when +-spec(expr(Expression, Indent, Precedence, Options) -> io_lib:chars() when Expression :: erl_parse:abstract_expr(), Indent :: integer(), Precedence :: non_neg_integer(), - HookFunction :: hook_function()). + Options :: options()). -expr(E, I, P, Hook) -> - frmt(lexpr(E, P, Hook), I). +expr(E, I, P, Options) -> + frmt(lexpr(E, P, options(Options)), I, state(Options)). %%% %%% Local functions %%% -lform({attribute,Line,Name,Arg}, Hook) -> - lattribute({attribute,Line,Name,Arg}, Hook); -lform({function,Line,Name,Arity,Clauses}, Hook) -> - lfunction({function,Line,Name,Arity,Clauses}, Hook); -lform({rule,Line,Name,Arity,Clauses}, Hook) -> - lrule({rule,Line,Name,Arity,Clauses}, Hook); +options(Options) when is_list(Options) -> + Hook = proplists:get_value(hook, Options, none), + Encoding = encoding(Options), + #options{hook = Hook, encoding = Encoding, opts = Options}; +options(Hook) -> + #options{hook = Hook, encoding = encoding([]), opts = Hook}. + +state(Options) when is_list(Options) -> + case encoding(Options) of + latin1 -> state(); + unicode -> unicode_state() + end; +state(_Hook) -> + state(). + +state() -> + #pp{string_fun = fun io_lib:write_unicode_string_as_latin1/1, + char_fun = fun io_lib:write_unicode_char_as_latin1/1}. + +unicode_state() -> + #pp{string_fun = fun io_lib:write_unicode_string/1, + char_fun = fun io_lib:write_unicode_char/1}. + +encoding(Options) -> + case proplists:get_value(encoding, Options, epp:default_encoding()) of + latin1 -> latin1; + utf8 -> unicode; + unicode -> unicode + end. + +lform({attribute,Line,Name,Arg}, Opts) -> + lattribute({attribute,Line,Name,Arg}, Opts); +lform({function,Line,Name,Arity,Clauses}, Opts) -> + lfunction({function,Line,Name,Arity,Clauses}, Opts); +lform({rule,Line,Name,Arity,Clauses}, Opts) -> + lrule({rule,Line,Name,Arity,Clauses}, Opts); %% These are specials to make it easier for the compiler. -lform({error,E}, _Hook) -> +lform({error,E}, _Opts) -> leaf(format("~p\n", [{error,E}])); -lform({warning,W}, _Hook) -> +lform({warning,W}, _Opts) -> leaf(format("~p\n", [{warning,W}])); -lform({eof,_Line}, _Hook) -> +lform({eof,_Line}, _Opts) -> $\n. -lattribute({attribute,_Line,type,Type}, Hook) -> - [typeattr(type, Type, Hook),leaf(".\n")]; -lattribute({attribute,_Line,opaque,Type}, Hook) -> - [typeattr(opaque, Type, Hook),leaf(".\n")]; -lattribute({attribute,_Line,spec,Arg}, _Hook) -> +lattribute({attribute,_Line,type,Type}, Opts) -> + [typeattr(type, Type, Opts),leaf(".\n")]; +lattribute({attribute,_Line,opaque,Type}, Opts) -> + [typeattr(opaque, Type, Opts),leaf(".\n")]; +lattribute({attribute,_Line,spec,Arg}, _Opts) -> [specattr(Arg),leaf(".\n")]; -lattribute({attribute,_Line,Name,Arg}, Hook) -> - [lattribute(Name, Arg, Hook),leaf(".\n")]. +lattribute({attribute,_Line,Name,Arg}, Opts) -> + [lattribute(Name, Arg, Opts),leaf(".\n")]. -lattribute(module, {M,Vs}, _Hook) -> +lattribute(module, {M,Vs}, _Opts) -> attr("module",[{var,0,pname(M)}, foldr(fun(V, C) -> {cons,0,{var,0,V},C} end, {nil,0}, Vs)]); -lattribute(module, M, _Hook) -> +lattribute(module, M, _Opts) -> attr("module", [{var,0,pname(M)}]); -lattribute(export, Falist, _Hook) -> +lattribute(export, Falist, _Opts) -> call({var,0,"-export"}, [falist(Falist)], 0, none); -lattribute(import, Name, _Hook) when is_list(Name) -> +lattribute(import, Name, _Opts) when is_list(Name) -> attr("import", [{var,0,pname(Name)}]); -lattribute(import, {From,Falist}, _Hook) -> +lattribute(import, {From,Falist}, _Opts) -> attr("import",[{var,0,pname(From)},falist(Falist)]); -lattribute(file, {Name,Line}, _Hook) -> +lattribute(file, {Name,Line}, _Opts) -> attr("file", [{var,0,format("~p", [Name])},{integer,0,Line}]); -lattribute(record, {Name,Is}, Hook) -> +lattribute(record, {Name,Is}, Opts) -> Nl = leaf(format("-record(~w,", [Name])), - [{first,Nl,record_fields(Is, Hook)},$)]; -lattribute(Name, Arg, _Hook) -> - attr(write(Name), [erl_parse:abstract(Arg)]). + [{first,Nl,record_fields(Is, Opts)},$)]; +lattribute(Name, Arg, #options{encoding = Encoding}) -> + attr(write(Name), [erl_parse:abstract(Arg, [{encoding,Encoding}])]). -typeattr(Tag, {TypeName,Type,Args}, _Hook) -> +typeattr(Tag, {TypeName,Type,Args}, _Opts) -> {first,leaf("-"++atom_to_list(Tag)++" "), typed(call({atom,0,TypeName}, Args, 0, none), Type)}. @@ -293,7 +331,7 @@ guard_type(Before, Gs) -> Gl = {list,[{step,'when',expr_list(Gs, [$,], fun constraint/2, none)}]}, {list,[{step,Before,Gl}]}. -constraint({type,_Line,constraint,[Tag,As]}, _Hook) -> +constraint({type,_Line,constraint,[Tag,As]}, _Opts) -> simple_type(Tag, As). fun_type(Before, {type,_,'fun',[FType,Ret]}) -> @@ -333,231 +371,232 @@ falist([]) -> falist([{Name,Arity}|Falist]) -> {cons,0,{var,0,format("~w/~w", [Name,Arity])},falist(Falist)}. -lfunction({function,_Line,Name,_Arity,Cs}, Hook) -> - Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Hook, Cs), +lfunction({function,_Line,Name,_Arity,Cs}, Opts) -> + Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Opts, Cs), [Cll,leaf(".\n")]. -func_clause(Name, {clause,Line,Head,Guard,Body}, Hook) -> - Hl = call({atom,Line,Name}, Head, 0, Hook), - Gl = guard_when(Hl, Guard, Hook), - Bl = body(Body, Hook), +func_clause(Name, {clause,Line,Head,Guard,Body}, Opts) -> + Hl = call({atom,Line,Name}, Head, 0, Opts), + Gl = guard_when(Hl, Guard, Opts), + Bl = body(Body, Opts), {step,Gl,Bl}. -lrule({rule,_Line,Name,_Arity,Cs}, Hook) -> - Cll = nl_clauses(fun (C, H) -> rule_clause(Name, C, H) end, $;, Hook, Cs), +lrule({rule,_Line,Name,_Arity,Cs}, Opts) -> + Cll = nl_clauses(fun (C, H) -> rule_clause(Name, C, H) end, $;, Opts, Cs), [Cll,leaf(".\n")]. -rule_clause(Name, {clause,Line,Head,Guard,Body}, Hook) -> - Hl = call({atom,Line,Name}, Head, 0, Hook), - Gl = guard_when(Hl, Guard, Hook, leaf(" :-")), - Bl = rule_body(Body, Hook), +rule_clause(Name, {clause,Line,Head,Guard,Body}, Opts) -> + Hl = call({atom,Line,Name}, Head, 0, Opts), + Gl = guard_when(Hl, Guard, Opts, leaf(" :-")), + Bl = rule_body(Body, Opts), {step,Gl,Bl}. -rule_body(Es, Hook) -> - lc_quals(Es, Hook). +rule_body(Es, Opts) -> + lc_quals(Es, Opts). -guard_when(Before, Guard, Hook) -> - guard_when(Before, Guard, Hook, ' ->'). +guard_when(Before, Guard, Opts) -> + guard_when(Before, Guard, Opts, ' ->'). -guard_when(Before, Guard, Hook, After) -> - Gl = lguard(Guard, Hook), +guard_when(Before, Guard, Opts, After) -> + Gl = lguard(Guard, Opts), [{list,[{step,Before,Gl}]},After]. -lguard([E|Es], Hook) when is_list(E) -> - {list,[{step,'when',expr_list([E|Es], [$;], fun guard0/2, Hook)}]}; -lguard([E|Es], Hook) -> % before R6 - lguard([[E|Es]], Hook); +lguard([E|Es], Opts) when is_list(E) -> + {list,[{step,'when',expr_list([E|Es], [$;], fun guard0/2, Opts)}]}; +lguard([E|Es], Opts) -> % before R6 + lguard([[E|Es]], Opts); lguard([], _) -> []. -guard0(Es, Hook) -> - expr_list(Es, [$,], fun lexpr/2, Hook). +guard0(Es, Opts) -> + expr_list(Es, [$,], fun lexpr/2, Opts). -%% body(Before, Es, Hook) -> [Char]. +%% body(Before, Es, Opts) -> [Char]. -body([E], Hook) -> - lexpr(E, Hook); -body(Es, Hook) -> - {prefer_nl,[$,],lexprs(Es, Hook)}. +body([E], Opts) -> + lexpr(E, Opts); +body(Es, Opts) -> + {prefer_nl,[$,],lexprs(Es, Opts)}. -lexpr(E, Hook) -> - lexpr(E, 0, Hook). +lexpr(E, Opts) -> + lexpr(E, 0, Opts). lexpr({var,_,V}, _, _) when is_integer(V) -> %Special hack for Robert leaf(format("_~w", [V])); lexpr({var,_,V}, _, _) -> leaf(format("~s", [V])); -lexpr({char,_,C}, _, _) -> leaf(write_char(C)); +lexpr({char,_,C}, _, _) -> {char,C}; lexpr({integer,_,N}, _, _) -> leaf(write(N)); lexpr({float,_,F}, _, _) -> leaf(write(F)); lexpr({atom,_,A}, _, _) -> leaf(write(A)); lexpr({string,_,S}, _, _) -> {string,S}; lexpr({nil,_}, _, _) -> '[]'; -lexpr({cons,_,H,T}, _, Hook) -> - list(T, [H], Hook); -lexpr({lc,_,E,Qs}, _Prec, Hook) -> - Lcl = {list,[{step,[lexpr(E, Hook),leaf(" ||")],lc_quals(Qs, Hook)}]}, +lexpr({cons,_,H,T}, _, Opts) -> + list(T, [H], Opts); +lexpr({lc,_,E,Qs}, _Prec, Opts) -> + Lcl = {list,[{step,[lexpr(E, Opts),leaf(" ||")],lc_quals(Qs, Opts)}]}, {list,[{seq,$[,[],[[]],[{force_nl,leaf(" "),[Lcl]}]},$]]}; %% {list,[{step,$[,Lcl},$]]}; -lexpr({bc,_,E,Qs}, _Prec, Hook) -> - Lcl = {list,[{step,[lexpr(E, Hook),leaf(" ||")],lc_quals(Qs, Hook)}]}, +lexpr({bc,_,E,Qs}, _Prec, Opts) -> + Lcl = {list,[{step,[lexpr(E, Opts),leaf(" ||")],lc_quals(Qs, Opts)}]}, {list,[{seq,'<<',[],[[]],[{force_nl,leaf(" "),[Lcl]}]},'>>']}; %% {list,[{step,'<<',Lcl},'>>']}; -lexpr({tuple,_,Elts}, _, Hook) -> - tuple(Elts, Hook); -%%lexpr({struct,_,Tag,Elts}, _, Hook) -> -%% {first,format("~w", [Tag]),tuple(Elts, Hook)}; -lexpr({record_index, _, Name, F}, Prec, Hook) -> +lexpr({tuple,_,Elts}, _, Opts) -> + tuple(Elts, Opts); +%%lexpr({struct,_,Tag,Elts}, _, Opts) -> +%% {first,format("~w", [Tag]),tuple(Elts, Opts)}; +lexpr({record_index, _, Name, F}, Prec, Opts) -> {P,R} = preop_prec('#'), Nl = record_name(Name), - El = [Nl,$.,lexpr(F, R, Hook)], + El = [Nl,$.,lexpr(F, R, Opts)], maybe_paren(P, Prec, El); -lexpr({record, _, Name, Fs}, Prec, Hook) -> +lexpr({record, _, Name, Fs}, Prec, Opts) -> {P,_R} = preop_prec('#'), Nl = record_name(Name), - El = {first,Nl,record_fields(Fs, Hook)}, + El = {first,Nl,record_fields(Fs, Opts)}, maybe_paren(P, Prec, El); -lexpr({record_field, _, Rec, Name, F}, Prec, Hook) -> +lexpr({record_field, _, Rec, Name, F}, Prec, Opts) -> {L,P,R} = inop_prec('#'), - Rl = lexpr(Rec, L, Hook), + Rl = lexpr(Rec, L, Opts), Nl = leaf(format("#~w.", [Name])), - El = [Rl,Nl,lexpr(F, R, Hook)], + El = [Rl,Nl,lexpr(F, R, Opts)], maybe_paren(P, Prec, El); -lexpr({record, _, Rec, Name, Fs}, Prec, Hook) -> +lexpr({record, _, Rec, Name, Fs}, Prec, Opts) -> {L,P,_R} = inop_prec('#'), - Rl = lexpr(Rec, L, Hook), + Rl = lexpr(Rec, L, Opts), Nl = record_name(Name), - El = {first,[Rl,Nl],record_fields(Fs, Hook)}, + El = {first,[Rl,Nl],record_fields(Fs, Opts)}, maybe_paren(P, Prec, El); -lexpr({record_field, _, {atom,_,''}, F}, Prec, Hook) -> +lexpr({record_field, _, {atom,_,''}, F}, Prec, Opts) -> {_L,P,R} = inop_prec('.'), - El = [$.,lexpr(F, R, Hook)], + El = [$.,lexpr(F, R, Opts)], maybe_paren(P, Prec, El); -lexpr({record_field, _, Rec, F}, Prec, Hook) -> +lexpr({record_field, _, Rec, F}, Prec, Opts) -> {L,P,R} = inop_prec('.'), - El = [lexpr(Rec, L, Hook),$.,lexpr(F, R, Hook)], + El = [lexpr(Rec, L, Opts),$.,lexpr(F, R, Opts)], maybe_paren(P, Prec, El); -lexpr({block,_,Es}, _, Hook) -> - {list,[{step,'begin',body(Es, Hook)},'end']}; -lexpr({'if',_,Cs}, _, Hook) -> - {list,[{step,'if',if_clauses(Cs, Hook)},'end']}; -lexpr({'case',_,Expr,Cs}, _, Hook) -> - {list,[{step,{list,[{step,'case',lexpr(Expr, Hook)},'of']}, - cr_clauses(Cs, Hook)}, +lexpr({block,_,Es}, _, Opts) -> + {list,[{step,'begin',body(Es, Opts)},'end']}; +lexpr({'if',_,Cs}, _, Opts) -> + {list,[{step,'if',if_clauses(Cs, Opts)},'end']}; +lexpr({'case',_,Expr,Cs}, _, Opts) -> + {list,[{step,{list,[{step,'case',lexpr(Expr, Opts)},'of']}, + cr_clauses(Cs, Opts)}, 'end']}; -lexpr({'cond',_,Cs}, _, Hook) -> - {list,[{step,leaf("cond"),cond_clauses(Cs, Hook)},'end']}; -lexpr({'receive',_,Cs}, _, Hook) -> - {list,[{step,'receive',cr_clauses(Cs, Hook)},'end']}; -lexpr({'receive',_,Cs,To,ToOpt}, _, Hook) -> - Al = {list,[{step,[lexpr(To, Hook),' ->'],body(ToOpt, Hook)}]}, - {list,[{step,'receive',cr_clauses(Cs, Hook)}, +lexpr({'cond',_,Cs}, _, Opts) -> + {list,[{step,leaf("cond"),cond_clauses(Cs, Opts)},'end']}; +lexpr({'receive',_,Cs}, _, Opts) -> + {list,[{step,'receive',cr_clauses(Cs, Opts)},'end']}; +lexpr({'receive',_,Cs,To,ToOpt}, _, Opts) -> + Al = {list,[{step,[lexpr(To, Opts),' ->'],body(ToOpt, Opts)}]}, + {list,[{step,'receive',cr_clauses(Cs, Opts)}, {step,'after',Al}, 'end']}; -lexpr({'fun',_,{function,F,A}}, _Prec, _Hook) -> +lexpr({'fun',_,{function,F,A}}, _Prec, _Opts) -> leaf(format("fun ~w/~w", [F,A])); -lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Hook) -> +lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Opts) -> {force_nl,fun_info(Extra),leaf(format("fun ~w/~w", [F,A]))}; -lexpr({'fun',_,{function,M,F,A}}, _Prec, _Hook) +lexpr({'fun',_,{function,M,F,A}}, _Prec, _Opts) when is_atom(M), is_atom(F), is_integer(A) -> %% For backward compatibility with pre-R15 abstract format. leaf(format("fun ~w:~w/~w", [M,F,A])); -lexpr({'fun',_,{function,M,F,A}}, _Prec, Hook) -> +lexpr({'fun',_,{function,M,F,A}}, _Prec, Opts) -> %% New format in R15. - NameItem = lexpr(M, Hook), - CallItem = lexpr(F, Hook), - ArityItem = lexpr(A, Hook), + NameItem = lexpr(M, Opts), + CallItem = lexpr(F, Opts), + ArityItem = lexpr(A, Opts), ["fun ",NameItem,$:,CallItem,$/,ArityItem]; -lexpr({'fun',_,{clauses,Cs}}, _Prec, Hook) -> - {list,[{first,'fun',fun_clauses(Cs, Hook)},'end']}; -lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Hook) -> +lexpr({'fun',_,{clauses,Cs}}, _Prec, Opts) -> + {list,[{first,'fun',fun_clauses(Cs, Opts)},'end']}; +lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Opts) -> {force_nl,fun_info(Extra), - {list,[{first,'fun',fun_clauses(Cs, Hook)},'end']}}; -lexpr({'query',_,Lc}, _Prec, Hook) -> - {list,[{step,leaf("query"),lexpr(Lc, 0, Hook)},'end']}; -lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Hook) -> + {list,[{first,'fun',fun_clauses(Cs, Opts)},'end']}}; +lexpr({'query',_,Lc}, _Prec, Opts) -> + {list,[{step,leaf("query"),lexpr(Lc, 0, Opts)},'end']}; +lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Opts) -> case erl_internal:bif(M, F, length(Args)) of true -> - call(N, Args, Prec, Hook); + call(N, Args, Prec, Opts); false -> - call(Name, Args, Prec, Hook) + call(Name, Args, Prec, Opts) end; -lexpr({call,_,Name,Args}, Prec, Hook) -> - call(Name, Args, Prec, Hook); -lexpr({'try',_,Es,Scs,Ccs,As}, _, Hook) -> +lexpr({call,_,Name,Args}, Prec, Opts) -> + call(Name, Args, Prec, Opts); +lexpr({'try',_,Es,Scs,Ccs,As}, _, Opts) -> {list,[if Scs =:= [] -> - {step,'try',body(Es, Hook)}; + {step,'try',body(Es, Opts)}; true -> - {step,{list,[{step,'try',body(Es, Hook)},'of']}, - cr_clauses(Scs, Hook)} + {step,{list,[{step,'try',body(Es, Opts)},'of']}, + cr_clauses(Scs, Opts)} end, if Ccs =:= [] -> []; true -> - {step,'catch',try_clauses(Ccs, Hook)} + {step,'catch',try_clauses(Ccs, Opts)} end, if As =:= [] -> []; true -> - {step,'after',body(As, Hook)} + {step,'after',body(As, Opts)} end, 'end']}; -lexpr({'catch',_,Expr}, Prec, Hook) -> +lexpr({'catch',_,Expr}, Prec, Opts) -> {P,R} = preop_prec('catch'), - El = {list,[{step,'catch',lexpr(Expr, R, Hook)}]}, + El = {list,[{step,'catch',lexpr(Expr, R, Opts)}]}, maybe_paren(P, Prec, El); -lexpr({match,_,Lhs,Rhs}, Prec, Hook) -> +lexpr({match,_,Lhs,Rhs}, Prec, Opts) -> {L,P,R} = inop_prec('='), - Pl = lexpr(Lhs, L, Hook), - Rl = lexpr(Rhs, R, Hook), + Pl = lexpr(Lhs, L, Opts), + Rl = lexpr(Rhs, R, Opts), El = {list,[{cstep,[Pl,' ='],Rl}]}, maybe_paren(P, Prec, El); -lexpr({op,_,Op,Arg}, Prec, Hook) -> +lexpr({op,_,Op,Arg}, Prec, Opts) -> {P,R} = preop_prec(Op), Ol = leaf(format("~s ", [Op])), - El = [Ol,lexpr(Arg, R, Hook)], + El = [Ol,lexpr(Arg, R, Opts)], maybe_paren(P, Prec, El); -lexpr({op,_,Op,Larg,Rarg}, Prec, Hook) when Op =:= 'orelse'; +lexpr({op,_,Op,Larg,Rarg}, Prec, Opts) when Op =:= 'orelse'; Op =:= 'andalso' -> %% Breaks lines since R12B. {L,P,R} = inop_prec(Op), - Ll = lexpr(Larg, L, Hook), + Ll = lexpr(Larg, L, Opts), Ol = leaf(format("~s", [Op])), - Lr = lexpr(Rarg, R, Hook), + Lr = lexpr(Rarg, R, Opts), El = {prefer_nl,[[]],[Ll,Ol,Lr]}, maybe_paren(P, Prec, El); -lexpr({op,_,Op,Larg,Rarg}, Prec, Hook) -> +lexpr({op,_,Op,Larg,Rarg}, Prec, Opts) -> {L,P,R} = inop_prec(Op), - Ll = lexpr(Larg, L, Hook), + Ll = lexpr(Larg, L, Opts), Ol = leaf(format("~s", [Op])), - Lr = lexpr(Rarg, R, Hook), + Lr = lexpr(Rarg, R, Opts), El = {list,[Ll,Ol,Lr]}, maybe_paren(P, Prec, El); %% Special expressions which are not really legal everywhere. -lexpr({remote,_,M,F}, Prec, Hook) -> +lexpr({remote,_,M,F}, Prec, Opts) -> {L,P,R} = inop_prec(':'), - NameItem = lexpr(M, L, Hook), - CallItem = lexpr(F, R, Hook), + NameItem = lexpr(M, L, Opts), + CallItem = lexpr(F, R, Opts), maybe_paren(P, Prec, [NameItem,$:,CallItem]); %% BIT SYNTAX: -lexpr({bin,_,Fs}, _, Hook) -> - bit_grp(Fs, Hook); +lexpr({bin,_,Fs}, _, Opts) -> + bit_grp(Fs, Opts); %% Special case for straight values. lexpr({value,_,Val}, _,_) -> leaf(write(Val)); %% Now do the hook. -lexpr(Other, _Precedence, none) -> +lexpr(Other, _Precedence, #options{hook = none}) -> leaf(format("INVALID-FORM:~w:",[Other])); -lexpr(HookExpr, Precedence, {Mod,Func,Eas}) when Mod =/= 'fun' -> +lexpr(HookExpr, Precedence, #options{hook = {Mod,Func,Eas}}) + when Mod =/= 'fun' -> {ehook,HookExpr,Precedence,{Mod,Func,Eas}}; -lexpr(HookExpr, Precedence, Func) -> - {hook,HookExpr,Precedence,Func}. +lexpr(HookExpr, Precedence, #options{hook = Func, opts = Options}) -> + {hook,HookExpr,Precedence,Func,Options}. -call(Name, Args, Prec, Hook) -> +call(Name, Args, Prec, Opts) -> {F,P} = func_prec(), - Item = {first,lexpr(Name, F, Hook),args(Args, Hook)}, + Item = {first,lexpr(Name, F, Opts),args(Args, Opts)}, maybe_paren(P, Prec, Item). fun_info(Extra) -> @@ -565,32 +604,18 @@ fun_info(Extra) -> %% BITS: -bit_grp(Fs, Hook) -> - append([['<<'], - [try - true = Fs =/= [], - S = bin_string(Fs), - true = io_lib:printable_list(S), - {string,S} - catch _:_ -> - bit_elems(Fs, Hook) - end], - ['>>']]). - -bin_string([]) -> - []; -bin_string([{bin_element,_,{char,_,C},_,_}|Bin]) -> - [C | bin_string(Bin)]. +bit_grp(Fs, Opts) -> + append([['<<'], [bit_elems(Fs, Opts)], ['>>']]). -bit_elems(Es, Hook) -> - expr_list(Es, $,, fun bit_elem/2, Hook). +bit_elems(Es, Opts) -> + expr_list(Es, $,, fun bit_elem/2, Opts). -bit_elem({bin_element,_,Expr,Sz,Types}, Hook) -> +bit_elem({bin_element,_,Expr,Sz,Types}, Opts) -> P = max_prec(), - VChars = lexpr(Expr, P, Hook), + VChars = lexpr(Expr, P, Opts), SChars = if Sz =/= default -> - [VChars,$:,lexpr(Sz, P, Hook)]; + [VChars,$:,lexpr(Sz, P, Opts)]; true -> VChars end, @@ -618,157 +643,157 @@ bit_elem_type(T) -> record_name(Name) -> leaf(format("#~w", [Name])). -record_fields(Fs, Hook) -> - tuple(Fs, fun record_field/2, Hook). +record_fields(Fs, Opts) -> + tuple(Fs, fun record_field/2, Opts). -record_field({record_field,_,F,Val}, Hook) -> +record_field({record_field,_,F,Val}, Opts) -> {L,_P,R} = inop_prec('='), - Fl = lexpr(F, L, Hook), - Vl = lexpr(Val, R, Hook), + Fl = lexpr(F, L, Opts), + Vl = lexpr(Val, R, Opts), {list,[{cstep,[Fl,' ='],Vl}]}; -record_field({typed_record_field,{record_field,_,F,Val},Type}, Hook) -> +record_field({typed_record_field,{record_field,_,F,Val},Type}, Opts) -> {L,_P,R} = inop_prec('='), - Fl = lexpr(F, L, Hook), - Vl = typed(lexpr(Val, R, Hook), Type), + Fl = lexpr(F, L, Opts), + Vl = typed(lexpr(Val, R, Opts), Type), {list,[{cstep,[Fl,' ='],Vl}]}; -record_field({typed_record_field,Field,Type}, Hook) -> - typed(record_field(Field, Hook), Type); -record_field({record_field,_,F}, Hook) -> - lexpr(F, 0, Hook). - -list({cons,_,H,T}, Es, Hook) -> - list(T, [H|Es], Hook); -list({nil,_}, Es, Hook) -> - proper_list(reverse(Es), Hook); -list(Other, Es, Hook) -> - improper_list(reverse(Es, [Other]), Hook). - -%% if_clauses(Clauses, Hook) -> [Char]. +record_field({typed_record_field,Field,Type}, Opts) -> + typed(record_field(Field, Opts), Type); +record_field({record_field,_,F}, Opts) -> + lexpr(F, 0, Opts). + +list({cons,_,H,T}, Es, Opts) -> + list(T, [H|Es], Opts); +list({nil,_}, Es, Opts) -> + proper_list(reverse(Es), Opts); +list(Other, Es, Opts) -> + improper_list(reverse(Es, [Other]), Opts). + +%% if_clauses(Clauses, Opts) -> [Char]. %% Print 'if' clauses. -if_clauses(Cs, Hook) -> - clauses(fun if_clause/2, Hook, Cs). +if_clauses(Cs, Opts) -> + clauses(fun if_clause/2, Opts, Cs). -if_clause({clause,_,[],G,B}, Hook) -> - Gl = [guard_no_when(G, Hook),' ->'], - {step,Gl,body(B, Hook)}. +if_clause({clause,_,[],G,B}, Opts) -> + Gl = [guard_no_when(G, Opts),' ->'], + {step,Gl,body(B, Opts)}. -guard_no_when([E|Es], Hook) when is_list(E) -> - expr_list([E|Es], $;, fun guard0/2, Hook); -guard_no_when([E|Es], Hook) -> % before R6 - guard_no_when([[E|Es]], Hook); +guard_no_when([E|Es], Opts) when is_list(E) -> + expr_list([E|Es], $;, fun guard0/2, Opts); +guard_no_when([E|Es], Opts) -> % before R6 + guard_no_when([[E|Es]], Opts); guard_no_when([], _) -> % cannot happen leaf("true"). -%% cr_clauses(Clauses, Hook) -> [Char]. +%% cr_clauses(Clauses, Opts) -> [Char]. %% Print 'case'/'receive' clauses. -cr_clauses(Cs, Hook) -> - clauses(fun cr_clause/2, Hook, Cs). +cr_clauses(Cs, Opts) -> + clauses(fun cr_clause/2, Opts, Cs). -cr_clause({clause,_,[T],G,B}, Hook) -> - El = lexpr(T, 0, Hook), - Gl = guard_when(El, G, Hook), - Bl = body(B, Hook), +cr_clause({clause,_,[T],G,B}, Opts) -> + El = lexpr(T, 0, Opts), + Gl = guard_when(El, G, Opts), + Bl = body(B, Opts), {step,Gl,Bl}. -%% try_clauses(Clauses, Hook) -> [Char]. +%% try_clauses(Clauses, Opts) -> [Char]. %% Print 'try' clauses. -try_clauses(Cs, Hook) -> - clauses(fun try_clause/2, Hook, Cs). +try_clauses(Cs, Opts) -> + clauses(fun try_clause/2, Opts, Cs). -try_clause({clause,_,[{tuple,_,[{atom,_,throw},V,S]}],G,B}, Hook) -> - El = lexpr(V, 0, Hook), - Sl = stack_backtrace(S, [El], Hook), - Gl = guard_when(Sl, G, Hook), - Bl = body(B, Hook), +try_clause({clause,_,[{tuple,_,[{atom,_,throw},V,S]}],G,B}, Opts) -> + El = lexpr(V, 0, Opts), + Sl = stack_backtrace(S, [El], Opts), + Gl = guard_when(Sl, G, Opts), + Bl = body(B, Opts), {step,Gl,Bl}; -try_clause({clause,_,[{tuple,_,[C,V,S]}],G,B}, Hook) -> - Cs = lexpr(C, 0, Hook), - El = lexpr(V, 0, Hook), +try_clause({clause,_,[{tuple,_,[C,V,S]}],G,B}, Opts) -> + Cs = lexpr(C, 0, Opts), + El = lexpr(V, 0, Opts), CsEl = [Cs,$:,El], - Sl = stack_backtrace(S, CsEl, Hook), - Gl = guard_when(Sl, G, Hook), - Bl = body(B, Hook), + Sl = stack_backtrace(S, CsEl, Opts), + Gl = guard_when(Sl, G, Opts), + Bl = body(B, Opts), {step,Gl,Bl}. -stack_backtrace({var,_,'_'}, El, _Hook) -> +stack_backtrace({var,_,'_'}, El, _Opts) -> El; -stack_backtrace(S, El, Hook) -> - El++[$:,lexpr(S, 0, Hook)]. +stack_backtrace(S, El, Opts) -> + El++[$:,lexpr(S, 0, Opts)]. -%% fun_clauses(Clauses, Hook) -> [Char]. +%% fun_clauses(Clauses, Opts) -> [Char]. %% Print 'fun' clauses. -fun_clauses(Cs, Hook) -> - nl_clauses(fun fun_clause/2, [$;], Hook, Cs). +fun_clauses(Cs, Opts) -> + nl_clauses(fun fun_clause/2, [$;], Opts, Cs). -fun_clause({clause,_,A,G,B}, Hook) -> - El = args(A, Hook), - Gl = guard_when(El, G, Hook), - Bl = body(B, Hook), +fun_clause({clause,_,A,G,B}, Opts) -> + El = args(A, Opts), + Gl = guard_when(El, G, Opts), + Bl = body(B, Opts), {step,Gl,Bl}. -%% cond_clauses(Clauses, Hook) -> [Char]. +%% cond_clauses(Clauses, Opts) -> [Char]. %% Print 'cond' clauses. -cond_clauses(Cs, Hook) -> - clauses(fun cond_clause/2, Hook, Cs). +cond_clauses(Cs, Opts) -> + clauses(fun cond_clause/2, Opts, Cs). -cond_clause({clause,_,[],[[E]],B}, Hook) -> - {step,[lexpr(E, Hook),' ->'],body(B, Hook)}. +cond_clause({clause,_,[],[[E]],B}, Opts) -> + {step,[lexpr(E, Opts),' ->'],body(B, Opts)}. -%% nl_clauses(Type, Hook, Clauses) -> [Char]. +%% nl_clauses(Type, Opts, Clauses) -> [Char]. %% Generic clause printing function (always breaks lines). -nl_clauses(Type, Sep, Hook, Cs) -> - {prefer_nl,Sep,lexprs(Cs, Type, Hook)}. +nl_clauses(Type, Sep, Opts, Cs) -> + {prefer_nl,Sep,lexprs(Cs, Type, Opts)}. -%% clauses(Type, Hook, Clauses) -> [Char]. +%% clauses(Type, Opts, Clauses) -> [Char]. %% Generic clause printing function (breaks lines since R12B). -clauses(Type, Hook, Cs) -> - {prefer_nl,[$;],lexprs(Cs, Type, Hook)}. +clauses(Type, Opts, Cs) -> + {prefer_nl,[$;],lexprs(Cs, Type, Opts)}. -%% lc_quals(Qualifiers, After, Hook) +%% lc_quals(Qualifiers, After, Opts) %% List comprehension qualifiers (breaks lines since R12B). -lc_quals(Qs, Hook) -> - {prefer_nl,[$,],lexprs(Qs, fun lc_qual/2, Hook)}. +lc_quals(Qs, Opts) -> + {prefer_nl,[$,],lexprs(Qs, fun lc_qual/2, Opts)}. -lc_qual({b_generate,_,Pat,E}, Hook) -> - Pl = lexpr(Pat, 0, Hook), - {list,[{step,[Pl,leaf(" <=")],lexpr(E, 0, Hook)}]}; -lc_qual({generate,_,Pat,E}, Hook) -> - Pl = lexpr(Pat, 0, Hook), - {list,[{step,[Pl,leaf(" <-")],lexpr(E, 0, Hook)}]}; -lc_qual(Q, Hook) -> - lexpr(Q, 0, Hook). +lc_qual({b_generate,_,Pat,E}, Opts) -> + Pl = lexpr(Pat, 0, Opts), + {list,[{step,[Pl,leaf(" <=")],lexpr(E, 0, Opts)}]}; +lc_qual({generate,_,Pat,E}, Opts) -> + Pl = lexpr(Pat, 0, Opts), + {list,[{step,[Pl,leaf(" <-")],lexpr(E, 0, Opts)}]}; +lc_qual(Q, Opts) -> + lexpr(Q, 0, Opts). -proper_list(Es, Hook) -> - {seq,$[,$],$,,lexprs(Es, Hook)}. +proper_list(Es, Opts) -> + {seq,$[,$],$,,lexprs(Es, Opts)}. -improper_list(Es, Hook) -> - {seq,$[,$],{$,,$|},lexprs(Es, Hook)}. +improper_list(Es, Opts) -> + {seq,$[,$],{$,,$|},lexprs(Es, Opts)}. -tuple(L, Hook) -> - tuple(L, fun lexpr/2, Hook). +tuple(L, Opts) -> + tuple(L, fun lexpr/2, Opts). -tuple(Es, F, Hook) -> - {seq,${,$},$,,lexprs(Es, F, Hook)}. +tuple(Es, F, Opts) -> + {seq,${,$},$,,lexprs(Es, F, Opts)}. -args(As, Hook) -> - {seq,$(,$),[$,],lexprs(As, Hook)}. +args(As, Opts) -> + {seq,$(,$),[$,],lexprs(As, Opts)}. -expr_list(Es, Sep, F, Hook) -> - {seq,[],[],Sep,lexprs(Es, F, Hook)}. +expr_list(Es, Sep, F, Opts) -> + {seq,[],[],Sep,lexprs(Es, F, Opts)}. -lexprs(Es, Hook) -> - lexprs(Es, fun lexpr/2, Hook). +lexprs(Es, Opts) -> + lexprs(Es, fun lexpr/2, Opts). -lexprs(Es, F, Hook) -> - [F(E, Hook) || E <- Es]. +lexprs(Es, F, Opts) -> + [F(E, Opts) || E <- Es]. maybe_paren(P, Prec, Expr) when P < Prec -> [$(,Expr,$)]; @@ -781,13 +806,13 @@ leaf(S) -> %%% Do the formatting. Currently nothing fancy. Could probably have %%% done it in one single pass. -frmt(Item) -> - frmt(Item, 0). +frmt(Item, PP) -> + frmt(Item, 0, PP). -frmt(Item, I) -> +frmt(Item, I, PP) -> ST = spacetab(), WT = wordtable(), - {Chars,_Length} = f(Item, I, ST, WT), + {Chars,_Length} = f(Item, I, ST, WT, PP), [Chars]. %%% What the tags mean: @@ -803,6 +828,7 @@ frmt(Item, I) -> %%% - {force_nl,ExtraInfo,I}: fun-info (a comment) forces linebreak before I. %%% - {prefer_nl,Sep,IPs}: forces linebreak between Is unlesss negative %%% indentation. +%%% - {char,C}: a character %%% - {string,S}: a string. %%% - {hook,...}, {ehook,...}: hook expressions. %%% @@ -812,22 +838,22 @@ frmt(Item, I) -> %%% cstep works similarly, but no linebreak if the width of I1 is less %%% than the indentation (this is for "A = <expression over several lines>). -f([]=Nil, _I0, _ST, _WT) -> +f([]=Nil, _I0, _ST, _WT, _PP) -> {Nil,0}; -f(C, _I0, _ST, _WT) when is_integer(C) -> +f(C, _I0, _ST, _WT, _PP) when is_integer(C) -> {C,1}; -f({leaf,Length,Chars}, _I0, _ST, _WT) -> +f({leaf,Length,Chars}, _I0, _ST, _WT, _PP) -> {Chars,Length}; -f([Item|Items], I0, ST, WT) -> - consecutive(Items, f(Item, I0, ST, WT), I0, ST, WT); -f({list,Items}, I0, ST, WT) -> - f({seq,[],[],[[]],Items}, I0, ST, WT); -f({first,E,Item}, I0, ST, WT) -> - f({seq,E,[],[[]],[Item]}, I0, ST, WT); -f({seq,Before,After,Sep,LItems}, I0, ST, WT) -> - BCharsSize = f(Before, I0, ST, WT), +f([Item|Items], I0, ST, WT, PP) -> + consecutive(Items, f(Item, I0, ST, WT, PP), I0, ST, WT, PP); +f({list,Items}, I0, ST, WT, PP) -> + f({seq,[],[],[[]],Items}, I0, ST, WT, PP); +f({first,E,Item}, I0, ST, WT, PP) -> + f({seq,E,[],[[]],[Item]}, I0, ST, WT, PP); +f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) -> + BCharsSize = f(Before, I0, ST, WT, PP), I = indent(BCharsSize, I0), - CharsSizeL = fl(LItems, Sep, I, After, ST, WT), + CharsSizeL = fl(LItems, Sep, I, After, ST, WT, PP), {CharsL,SizeL} = unz(CharsSizeL), {BCharsL,BSizeL} = unz1([BCharsSize]), Sizes = BSizeL ++ SizeL, @@ -848,15 +874,15 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT) -> {BCharsL++insert_newlines(CharsSizeL, I, ST), nsz(lists:last(Sizes), I0)} end; -f({force_nl,_ExtraInfoItem,Item}, I, ST, WT) when I < 0 -> +f({force_nl,_ExtraInfoItem,Item}, I, ST, WT, PP) when I < 0 -> %% Extra info is a comment; cannot have that on the same line - f(Item, I, ST, WT); -f({force_nl,ExtraInfoItem,Item}, I, ST, WT) -> - f({prefer_nl,[],[ExtraInfoItem,Item]}, I, ST, WT); -f({prefer_nl,Sep,LItems}, I, ST, WT) when I < 0 -> - f({seq,[],[],Sep,LItems}, I, ST, WT); -f({prefer_nl,Sep,LItems}, I0, ST, WT) -> - CharsSize2L = fl(LItems, Sep, I0, [], ST, WT), + f(Item, I, ST, WT, PP); +f({force_nl,ExtraInfoItem,Item}, I, ST, WT, PP) -> + f({prefer_nl,[],[ExtraInfoItem,Item]}, I, ST, WT, PP); +f({prefer_nl,Sep,LItems}, I, ST, WT, PP) when I < 0 -> + f({seq,[],[],Sep,LItems}, I, ST, WT, PP); +f({prefer_nl,Sep,LItems}, I0, ST, WT, PP) -> + CharsSize2L = fl(LItems, Sep, I0, [], ST, WT, PP), {_CharsL,Sizes} = unz(CharsSize2L), if Sizes =:= [] -> @@ -864,37 +890,40 @@ f({prefer_nl,Sep,LItems}, I0, ST, WT) -> true -> {insert_newlines(CharsSize2L, I0, ST),nsz(lists:last(Sizes), I0)} end; -f({string,S}, I, ST, WT) -> - f(write_a_string(S, I), I, ST, WT); -f({hook,HookExpr,Precedence,Func}, I, _ST, _WT) -> - Chars = Func(HookExpr, I, Precedence, Func), +f({char,C}, I, ST, WT, PP) -> + f(write_a_char(C, PP), I, ST, WT, PP); +f({string,S}, I, ST, WT, PP) -> + f(write_a_string(S, I, PP), I, ST, WT, PP); +f({hook,HookExpr,Precedence,Func,Options}, I, _ST, _WT, _PP) -> + Chars = Func(HookExpr, I, Precedence, Options), {Chars,indentation(Chars, I)}; -f({ehook,HookExpr,Precedence,{Mod,Func,Eas}=ModFuncEas}, I, _ST, _WT) -> +f({ehook,HookExpr,Precedence,{Mod,Func,Eas}=ModFuncEas}, I, _ST, _WT, _PP) -> Chars = apply(Mod, Func, [HookExpr,I,Precedence,ModFuncEas|Eas]), {Chars,indentation(Chars, I)}; -f(WordName, _I, _ST, WT) -> % when is_atom(WordName) +f(WordName, _I, _ST, WT, _PP) -> % when is_atom(WordName) word(WordName, WT). -define(IND, 4). %% fl(ListItems, I0, ST, WT) -> [[CharsSize1,CharsSize2]] %% ListItems = [{Item,Items}|Item] -fl([], _Sep, I0, After, ST, WT) -> - [[f(After, I0, ST, WT),{[],0}]]; -fl(CItems, Sep0, I0, After, ST, WT) -> +fl([], _Sep, I0, After, ST, WT, PP) -> + [[f(After, I0, ST, WT, PP),{[],0}]]; +fl(CItems, Sep0, I0, After, ST, WT, PP) -> F = fun({step,Item1,Item2}, S) -> - [f(Item1, I0, ST, WT),f([Item2,S], incr(I0, ?IND), ST, WT)]; + [f(Item1, I0, ST, WT, PP), + f([Item2,S], incr(I0, ?IND), ST, WT, PP)]; ({cstep,Item1,Item2}, S) -> - {_,Sz1} = CharSize1 = f(Item1, I0, ST, WT), + {_,Sz1} = CharSize1 = f(Item1, I0, ST, WT, PP), if is_integer(Sz1), Sz1 < ?IND -> Item2p = [leaf("\s"),Item2,S], - [consecutive(Item2p, CharSize1, I0, ST, WT),{[],0}]; + [consecutive(Item2p, CharSize1, I0, ST, WT, PP),{[],0}]; true -> - [CharSize1,f([Item2,S], incr(I0, ?IND), ST, WT)] + [CharSize1,f([Item2,S], incr(I0, ?IND), ST, WT, PP)] end; (Item, S) -> - [f([Item,S], I0, ST, WT),{[],0}] + [f([Item,S], I0, ST, WT, PP),{[],0}] end, {Sep,LastSep} = case Sep0 of {_,_} -> Sep0; _ -> {Sep0,Sep0} end, fl1(CItems, F, Sep, LastSep, After). @@ -906,10 +935,10 @@ fl1([CItem1,CItem2], F, _Sep, LastSep, After) -> fl1([CItem|CItems], F, Sep, LastSep, After) -> [F(CItem, Sep)|fl1(CItems, F, Sep, LastSep, After)]. -consecutive(Items, CharSize1, I0, ST, WT) -> +consecutive(Items, CharSize1, I0, ST, WT, PP) -> {CharsSizes,_Length} = mapfoldl(fun(Item, Len) -> - CharsSize = f(Item, Len, ST, WT), + CharsSize = f(Item, Len, ST, WT, PP), {CharsSize,indent(CharsSize, Len)} end, indent(CharSize1, I0), Items), {CharsL,SizeL} = unz1([CharSize1|CharsSizes]), @@ -999,26 +1028,40 @@ has_nl([C|Cs]) -> has_nl([]) -> false. +write_a_char(C, PP) -> + flat_leaf(write_char(C, PP)). + -define(MIN_SUBSTRING, 5). -write_a_string(S, I) when I < 0; S =:= [] -> - leaf(write_string(S)); -write_a_string(S, I) -> +write_a_string(S, I, PP) when I < 0; S =:= [] -> + flat_leaf(write_string(S, PP)); +write_a_string(S, I, PP) -> Len = erlang:max(?MAXLINE-I, ?MIN_SUBSTRING), - {list,write_a_string(S, Len, Len)}. + {list,write_a_string(S, Len, Len, PP)}. -write_a_string([], _N, _Len) -> +write_a_string([], _N, _Len, _PP) -> []; -write_a_string(S, N, Len) -> +write_a_string(S, N, Len, PP) -> SS = string:sub_string(S, 1, N), - Sl = write_string(SS), - case (iolist_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of + Sl = write_string(SS, PP), + case (length(Sl) > Len) and (N > ?MIN_SUBSTRING) of true -> - write_a_string(S, N-1, Len); + write_a_string(S, N-1, Len, PP); false -> - [leaf(Sl)|write_a_string(lists:nthtail(length(SS), S), Len, Len)] + [flat_leaf(Sl) | + write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)] end. +flat_leaf(S) -> + L = lists:flatten(S), + {leaf,length(L),L}. + +write_string(S, PP) -> + lists:flatten((PP#pp.string_fun)(S)). + +write_char(C, PP) -> + lists:flatten((PP#pp.char_fun)(C)). + %% %% Utilities %% diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index d880656565..818703284f 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -55,7 +55,14 @@ token_info/1,token_info/2, attributes_info/1,attributes_info/2,set_attribute/3]). --export_type([error_info/0, line/0, return_cont/0, tokens_result/0]). +%%% Private +-export([continuation_location/1]). + +-export_type([error_info/0, + line/0, + options/0, + return_cont/0, + tokens_result/0]). %%% %%% Defines and type definitions @@ -74,7 +81,8 @@ -type location() :: line() | {line(),column()}. -type resword_fun() :: fun((atom()) -> boolean()). -type option() :: 'return' | 'return_white_spaces' | 'return_comments' - | 'text' | {'reserved_word_fun', resword_fun()}. + | 'text' | {'reserved_word_fun', resword_fun()} + | 'unicode'. -type options() :: option() | [option()]. -type symbol() :: atom() | float() | integer() | string(). -type info_line() :: integer() | term(). @@ -95,7 +103,8 @@ {resword_fun = fun reserved_word/1 :: resword_fun(), ws = false :: boolean(), comment = false :: boolean(), - text = false :: boolean()}). + text = false :: boolean(), + unicode = false :: boolean()}). %%---------------------------------------------------------------------------- @@ -183,6 +192,11 @@ tokens({erl_scan_continuation,Cs,Col,Toks,Line,St,Any,Fun}, CharSpec, _Loc, _Opts) -> tokens1(Cs++CharSpec, St, Line, Col, Toks, Fun, Any). +continuation_location({erl_scan_continuation,_,no_col,_,Line,_,_,_}) -> + Line; +continuation_location({erl_scan_continuation,_,Col,_,Line,_,_,_}) -> + {Line,Col}. + -type attribute_item() :: 'column' | 'length' | 'line' | 'location' | 'text'. -type info_location() :: location() | term(). @@ -322,13 +336,20 @@ string_thing(_) -> "string". (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). -define(DIGIT(C), C >= $0, C =< $9). -define(CHAR(C), is_integer(C), C >= 0). - -%% A workaround: Unicode strings are not returned as strings, but as -%% lists of integers. For instance, "b\x{aaa}c" => [98,2730,99]. This -%% is to protect the system from character codes greater than 255. To -%% be removed. Search for UNI to find workaround code. +-define(UNICODE(C), + (C >= 0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF)). + +%% When the option 'unicode' is false: return Unicode strings as lists +%% of integers and Unicode characters as integers. For instance, +%% erl_scan:string("\"b\x{aaa}c\".") is equivalent to +%% erl_scan:string("[98,2730,99]."). This is to protect the caller +%% from character codes greater than 255. Search for UNI to find code +%% implementing this "feature". The 'unicode' option is undocumented +%% and will probably be removed later. -define(NO_UNICODE, 0). --define(UNI255(C), (C) =< 16#ff). +-define(UNI255(C), (C =< 16#ff)). options(Opts0) when is_list(Opts0) -> Opts = lists:foldr(fun expand_opt/2, [], Opts0), @@ -342,10 +363,12 @@ options(Opts0) when is_list(Opts0) -> Comment = proplists:get_bool(return_comments, Opts), WS = proplists:get_bool(return_white_spaces, Opts), Txt = proplists:get_bool(text, Opts), + Unicode = proplists:get_bool(unicode, Opts), #erl_scan{resword_fun = RW_fun, comment = Comment, ws = WS, - text = Txt}; + text = Txt, + unicode = Unicode}; options(Opt) -> options([Opt]). @@ -626,15 +649,12 @@ scan1([$~|Cs], St, Line, Col, Toks) -> scan1([$&|Cs], St, Line, Col, Toks) -> tok2(Cs, St, Line, Col, Toks, "&", '&', 1); %% End of optimization. -scan1([C|Cs], St, Line, Col, Toks) when ?CHAR(C) -> +scan1([C|Cs], St, Line, Col, Toks) when ?CHAR(C), ?UNI255(C) -> Str = [C], - case catch list_to_atom(Str) of - Sym when is_atom(Sym) -> - tok2(Cs, St, Line, Col, Toks, Str, Sym, 1); - _ -> - Ncol = incr_column(Col, 1), - scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) - end; + tok2(Cs, St, Line, Col, Toks, Str, list_to_atom(Str), 1); +scan1([C|Cs], _St, Line, Col, _Toks) when ?CHAR(C) -> + Ncol = incr_column(Col, 1), + scan_error({illegal,character}, Line, Col, Line, Ncol, Cs); scan1([]=Cs, _St, Line, Col, Toks) -> {more,{Cs,Col,Toks,Line,[],fun scan/6}}; scan1(eof=Cs, _St, Line, Col, Toks) -> @@ -832,32 +852,44 @@ scan_char([$\\|Cs]=Cs0, St, Line, Col, Toks) -> {eof,Ncol} -> scan_error(char, Line, Col, Line, Ncol, eof); {nl,Val,Str,Ncs,Ncol} -> - Attrs = attributes(Line, Col, St, "$\\"++Str), + Attrs = attributes(Line, Col, St, "$\\"++Str), %" Ntoks = [{char,Attrs,Val}|Toks], scan1(Ncs, St, Line+1, Ncol, Ntoks); {unicode,Val,Str,Ncs,Ncol} -> - Attrs = attributes(Line, Col, St, "$\\"++Str), - Ntoks = [{integer,Attrs,Val}|Toks], % UNI + Attrs = attributes(Line, Col, St, "$\\"++Str), %" + Tag = char_tag(Val, St), % UNI + Ntoks = [{Tag,Attrs,Val}|Toks], scan1(Ncs, St, Line, Ncol, Ntoks); {Val,Str,Ncs,Ncol} -> - Attrs = attributes(Line, Col, St, "$\\"++Str), + Attrs = attributes(Line, Col, St, "$\\"++Str), %" Ntoks = [{char,Attrs,Val}|Toks], scan1(Ncs, St, Line, Ncol, Ntoks) end; scan_char([$\n=C|Cs], St, Line, Col, Toks) -> Attrs = attributes(Line, Col, St, [$$,C]), scan1(Cs, St, Line+1, new_column(Col, 1), [{char,Attrs,C}|Toks]); -scan_char([C|Cs], St, Line, Col, Toks) when ?CHAR(C) -> - Tag = if ?UNI255(C) -> char; true -> integer end, % UNI +scan_char([C|Cs], St, Line, Col, Toks) when ?UNICODE(C) -> + Tag = char_tag(C, St), % UNI Attrs = attributes(Line, Col, St, [$$,C]), scan1(Cs, St, Line, incr_column(Col, 2), [{Tag,Attrs,C}|Toks]); +scan_char([C|_Cs], _St, Line, Col, _Toks) when ?CHAR(C) -> + scan_error({illegal,character}, Line, Col, Line, incr_column(Col, 1), eof); scan_char([], _St, Line, Col, Toks) -> {more,{[$$],Col,Toks,Line,[],fun scan/6}}; scan_char(eof, _St, Line, Col, _Toks) -> scan_error(char, Line, Col, Line, incr_column(Col, 1), eof). +-compile({inline,[char_tag/2]}). + +char_tag(C, _St) when ?UNI255(C) -> + char; +char_tag(_C, #erl_scan{unicode = true}) -> + char; +char_tag(_C, _St) -> + integer. + scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> - case scan_string0(Cs, St, Line, Col, $\", Str, Wcs, Uni0) of + case scan_string0(Cs, St, Line, Col, $\", true, Str, Wcs, Uni0) of %" {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} -> State = {Nwcs,Nstr,Line0,Col0,Uni}, {more,{Ncs,Ncol,Toks,Nline,State,fun scan_string/6}}; @@ -865,8 +897,9 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. - scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); - {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} -> + scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %" + {Ncs,Nline,Ncol,Nstr,Nwcs,Uni} when Uni =:= ?NO_UNICODE; + St#erl_scan.unicode -> Attrs = attributes(Line0, Col0, St, Nstr), scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks]); {Ncs,Nline,Ncol,Nstr,_Nwcs,_Uni} -> @@ -918,7 +951,8 @@ unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Cline, Ccol) -> [{',',attributes(Cline, Ccol, St, "")} || Cs =/= "\""] ++ [Token|Toks]. scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> - case scan_string0(Cs, St, Line, Col, $\', Str, Wcs, Uni0) of + AllowUni = St#erl_scan.unicode, + case scan_string0(Cs, St, Line, Col, $\', AllowUni, Str, Wcs, Uni0) of %' {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} -> State = {Nwcs,Nstr,Line0,Col0,Uni}, {more,{Ncs,Ncol,Toks,Nline,State,fun scan_qatom/6}}; @@ -926,8 +960,9 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs); {error,Nline,Ncol,Nwcs,Ncs} -> Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars. - scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); - {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} -> + scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %' + {Ncs,Nline,Ncol,Nstr,Nwcs,Uni} -> + true = Uni =:= ?NO_UNICODE orelse AllowUni, case catch list_to_atom(Nwcs) of A when is_atom(A) -> Attrs = attributes(Line0, Col0, St, Nstr), @@ -937,38 +972,40 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) -> end end. -scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, [], Wcs, Uni) -> - scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni); -scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, Str, Wcs, Uni) -> - scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni); -scan_string0(Cs, _St, Line, Col, Q, [], Wcs, Uni) -> - scan_string_col(Cs, Line, Col, Q, Wcs, Uni); -scan_string0(Cs, _St, Line, Col, Q, Str, Wcs, Uni) -> - scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni). +scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, U, [], Wcs, Uni) -> + scan_string_no_col(Cs, Line, Col, Q, U, Wcs, Uni); +scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, U, Str, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, U, Str, Wcs, Uni); +scan_string0(Cs, _St, Line, Col, Q, U, [], Wcs, Uni) -> + scan_string_col(Cs, Line, Col, Q, U, Wcs, Uni); +scan_string0(Cs, _St, Line, Col, Q, U, Str, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, U, Str, Wcs, Uni). %% Optimization. Col =:= no_col. -scan_string_no_col([Q|Cs], Line, Col, Q, Wcs, Uni) -> +scan_string_no_col([Q|Cs], Line, Col, Q, _U, Wcs, Uni) -> {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs),Uni}; -scan_string_no_col([$\n=C|Cs], Line, Col, Q, Wcs, Uni) -> - scan_string_no_col(Cs, Line+1, Col, Q, [C|Wcs], Uni); -scan_string_no_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\, - ?CHAR(C), ?UNI255(C) -> - scan_string_no_col(Cs, Line, Col, Q, [C|Wcs], Uni); -scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni) -> - scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni). +scan_string_no_col([$\n=C|Cs], Line, Col, Q, U, Wcs, Uni) -> + scan_string_no_col(Cs, Line+1, Col, Q, U, [C|Wcs], Uni); +scan_string_no_col([C|Cs], Line, Col, Q, U, Wcs, Uni) when C =/= $\\, + ?CHAR(C), + ?UNI255(C) -> + scan_string_no_col(Cs, Line, Col, Q, U, [C|Wcs], Uni); +scan_string_no_col(Cs, Line, Col, Q, U, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, U, Wcs, Wcs, Uni). %% Optimization. Col =/= no_col. -scan_string_col([Q|Cs], Line, Col, Q, Wcs0, Uni) -> +scan_string_col([Q|Cs], Line, Col, Q, _U, Wcs0, Uni) -> Wcs = lists:reverse(Wcs0), Str = [Q|Wcs++[Q]], {Cs,Line,Col+1,Str,Wcs,Uni}; -scan_string_col([$\n=C|Cs], Line, _xCol, Q, Wcs, Uni) -> - scan_string_col(Cs, Line+1, 1, Q, [C|Wcs], Uni); -scan_string_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\, - ?CHAR(C), ?UNI255(C) -> - scan_string_col(Cs, Line, Col+1, Q, [C|Wcs], Uni); -scan_string_col(Cs, Line, Col, Q, Wcs, Uni) -> - scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni). +scan_string_col([$\n=C|Cs], Line, _xCol, Q, U, Wcs, Uni) -> + scan_string_col(Cs, Line+1, 1, Q, U, [C|Wcs], Uni); +scan_string_col([C|Cs], Line, Col, Q, U, Wcs, Uni) when C =/= $\\, + ?CHAR(C), + ?UNI255(C) -> + scan_string_col(Cs, Line, Col+1, Q, U, [C|Wcs], Uni); +scan_string_col(Cs, Line, Col, Q, U, Wcs, Uni) -> + scan_string1(Cs, Line, Col, Q, U, Wcs, Wcs, Uni). %% UNI_STR is to be replaced by STR when the Unicode-string-to-list %% workaround is eventually removed. @@ -979,14 +1016,14 @@ scan_string_col(Cs, Line, Col, Q, Wcs, Uni) -> %% but then the end location of the error tuple would not correspond %% to the start location of the returned Rest string. (Maybe the end %% location could be modified, but that too is ugly.) -scan_string1([Q|Cs], Line, Col, Q, Str0, Wcs0, Uni) -> +scan_string1([Q|Cs], Line, Col, Q, _U, Str0, Wcs0, Uni) -> Wcs = lists:reverse(Wcs0), Str = ?UNI_STR(Col, [Q|lists:reverse(Str0, [Q])]), {Cs,Line,incr_column(Col, 1),Str,Wcs,Uni}; -scan_string1([$\n=C|Cs], Line, Col, Q, Str, Wcs, Uni) -> +scan_string1([$\n=C|Cs], Line, Col, Q, U, Str, Wcs, Uni) -> Ncol = new_column(Col, 1), - scan_string1(Cs, Line+1, Ncol, Q, ?UNI_STR(Col, [C|Str]), [C|Wcs], Uni); -scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs, Uni) -> + scan_string1(Cs, Line+1, Ncol, Q, U, ?UNI_STR(Col, [C|Str]), [C|Wcs], Uni); +scan_string1([$\\|Cs]=Cs0, Line, Col, Q, U, Str, Wcs, Uni) -> case scan_escape(Cs, Col) of more -> {more,Cs0,Line,Col,Str,Wcs,Uni}; @@ -997,31 +1034,33 @@ scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs, Uni) -> {nl,Val,ValStr,Ncs,Ncol} -> Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), Nwcs = [Val|Wcs], - scan_string1(Ncs, Line+1, Ncol, Q, Nstr, Nwcs, Uni); - {unicode,_Val,_ValStr,Ncs,Ncol} when Q =:= $' -> %' Emacs + scan_string1(Ncs, Line+1, Ncol, Q, U, Nstr, Nwcs, Uni); + {unicode,_Val,_ValStr,Ncs,Ncol} when not U -> %' Emacs {char_error,Ncs,{illegal,character},Line,Col,incr_column(Ncol, 1)}; {unicode,Val,ValStr,Ncs,Ncol} -> % UNI. Uni is set to Val. Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), Nwcs = [Val|Wcs], % not used - scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Val); + scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, U, Nstr, Nwcs, Val); {Val,ValStr,Ncs,Ncol} -> Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])), Nwcs = [Val|Wcs], - scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Uni) + scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, U, Nstr, Nwcs, Uni) end; -scan_string1([C|Cs], Line, no_col=Col, Q, Str, Wcs, Uni) when ?CHAR(C), - ?UNI255(C) -> - %% scan_string1(Cs, Line, Col, Q, Str, [C|Wcs], Uni); - scan_string1(Cs, Line, Col, Q, [C|Str], [C|Wcs], Uni); % UNI -scan_string1([C|Cs], Line, Col, Q, Str, Wcs, Uni) when ?CHAR(C), ?UNI255(C) -> - scan_string1(Cs, Line, Col+1, Q, [C|Str], [C|Wcs], Uni); -scan_string1([C|Cs], Line, Col, $', _Str, _Wcs, _Uni) when ?CHAR(C) -> %' UNI +scan_string1([C|Cs], Line, no_col=Col, Q, U, Str, Wcs, Uni) when ?CHAR(C), + ?UNI255(C) -> + %% scan_string1(Cs, Line, Col, Q, U, Str, [C|Wcs], Uni); + scan_string1(Cs, Line, Col, Q, U, [C|Str], [C|Wcs], Uni); % UNI +scan_string1([C|Cs], Line, Col, Q, U, Str, Wcs, Uni) when ?CHAR(C), ?UNI255(C) -> + scan_string1(Cs, Line, Col+1, Q, U, [C|Str], [C|Wcs], Uni); +scan_string1([C|Cs], Line, Col, _Q, false, _Str, _Wcs, _Uni) when ?CHAR(C) -> %' UNI {char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)}; -scan_string1([C|Cs], Line, Col, Q, Str, Wcs, _Uni) when ?CHAR(C) -> % UNI - scan_string1(Cs, Line, incr_column(Col, 1), Q, [C|Str], [C|Wcs], C); -scan_string1([]=Cs, Line, Col, _Q, Str, Wcs, Uni) -> +scan_string1([C|Cs], Line, Col, Q, U, Str, Wcs, _Uni) when ?UNICODE(C) -> + scan_string1(Cs, Line, incr_column(Col, 1), Q, U, [C|Str], [C|Wcs], C); +scan_string1([C|Cs], Line, Col, _Q, _U, _Str, _Wcs, _Uni) when ?CHAR(C) -> % UNI + {char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)}; +scan_string1([]=Cs, Line, Col, _Q, _U, Str, Wcs, Uni) -> {more,Cs,Line,Col,Str,Wcs,Uni}; -scan_string1(eof, Line, Col, _Q, _Str, Wcs, _Uni) -> +scan_string1(eof, Line, Col, _Q, _U, _Str, Wcs, _Uni) -> {error,Line,Col,lists:reverse(Wcs),eof}. -define(OCT(C), C >= $0, C =< $7). @@ -1072,8 +1111,10 @@ scan_escape([$\n=C|Cs], Col) -> scan_escape([C0|Cs], Col) when ?CHAR(C0), ?UNI255(C0) -> C = escape_char(C0), {C,?UNI_STR(Col, [C0]),Cs,incr_column(Col, 1)}; -scan_escape([C|Cs], Col) when ?CHAR(C) -> % UNI +scan_escape([C|Cs], Col) when ?UNICODE(C) -> {unicode,C,?UNI_STR(Col, [C]),Cs,incr_column(Col, 1)}; +scan_escape([C|Cs], Col) when ?CHAR(C) -> % UNI + {error,Cs,{illegal,character},incr_column(Col, 1)}; scan_escape([], _Col) -> more; scan_escape(eof, Col) -> @@ -1091,7 +1132,7 @@ scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) -> case catch erlang:list_to_integer(Wcs, B) of Val when Val =< 16#FF -> {Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col, 1)}; - Val when Val =< 16#10FFFF -> + Val when ?UNICODE(Val) -> {unicode,Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col,1)}; _ -> {error,Cs,{illegal,character},incr_column(Col, 1)} @@ -1197,18 +1238,36 @@ float_end(Cs, St, Line, Col, Toks, Ncs0) -> scan_error({illegal,float}, Line, Col, Line, Ncol, Cs) end. -skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) -> - skip_comment(Cs, St, Line, Col, Toks, N+1); -skip_comment([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}}; skip_comment(Cs, St, Line, Col, Toks, N) -> + skip_comment(Cs, St, Line, Col, Toks, N, St#erl_scan.unicode). + +skip_comment([C|Cs], St, Line, Col, Toks, N, U) when C =/= $\n, ?CHAR(C) -> + case ?UNI255(C) orelse U andalso ?UNICODE(C) of + true -> + skip_comment(Cs, St, Line, Col, Toks, N+1, U); + false -> + Ncol = incr_column(Col, N+1), + scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) + end; +skip_comment([]=Cs, _St, Line, Col, Toks, N, _U) -> + {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}}; +skip_comment(Cs, St, Line, Col, Toks, N, _U) -> scan1(Cs, St, Line, incr_column(Col, N), Toks). -scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) -> - scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) -> +scan_comment(Cs, St, Line, Col, Toks, Ncs) -> + scan_comment(Cs, St, Line, Col, Toks, Ncs, St#erl_scan.unicode). + +scan_comment([C|Cs], St, Line, Col, Toks, Ncs, U) when C =/= $\n, ?CHAR(C) -> + case ?UNI255(C) orelse U andalso ?UNICODE(C) of + true -> + scan_comment(Cs, St, Line, Col, Toks, [C|Ncs], U); + false -> + Ncol = incr_column(Col, length(Ncs)+1), + scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) + end; +scan_comment([]=Cs, _St, Line, Col, Toks, Ncs, _U) -> {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}}; -scan_comment(Cs, St, Line, Col, Toks, Ncs0) -> +scan_comment(Cs, St, Line, Col, Toks, Ncs0, _U) -> Ncs = lists:reverse(Ncs0), tok3(Cs, St, Line, Col, Toks, comment, Ncs, Ncs). diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl index 498d850df3..99a9d138ac 100644 --- a/lib/stdlib/src/escript.erl +++ b/lib/stdlib/src/escript.erl @@ -710,7 +710,7 @@ epp_parse_file2(Epp, S, Forms, Parsed) -> epp_parse_file(Epp, S, [Form | Forms]) end; {error,{Ln,Mod,Args}} = Form -> - io:format("~s:~w: ~s\n", + io:format("~s:~w: ~ts\n", [S#state.file,Ln,Mod:format_error(Args)]), epp_parse_file(Epp, S#state{n_errors = S#state.n_errors + 1}, [Form | Forms]); {eof, _LastLine} = Eof -> @@ -780,10 +780,10 @@ report_errors(Errors) -> Errors). list_errors(F, [{Line,Mod,E}|Es]) -> - io:fwrite("~s:~w: ~s\n", [F,Line,Mod:format_error(E)]), + io:fwrite("~s:~w: ~ts\n", [F,Line,Mod:format_error(E)]), list_errors(F, Es); list_errors(F, [{Mod,E}|Es]) -> - io:fwrite("~s: ~s\n", [F,Mod:format_error(E)]), + io:fwrite("~s: ~ts\n", [F,Mod:format_error(E)]), list_errors(F, Es); list_errors(_F, []) -> ok. @@ -795,10 +795,10 @@ report_warnings(Ws0) -> lists:foreach(fun({_,Str}) -> io:put_chars(Str) end, Ws). format_message(F, [{Line,Mod,E}|Es]) -> - M = {{F,Line},io_lib:format("~s:~w: Warning: ~s\n", [F,Line,Mod:format_error(E)])}, + M = {{F,Line},io_lib:format("~s:~w: Warning: ~ts\n", [F,Line,Mod:format_error(E)])}, [M|format_message(F, Es)]; format_message(F, [{Mod,E}|Es]) -> - M = {none,io_lib:format("~s: Warning: ~s\n", [F,Mod:format_error(E)])}, + M = {none,io_lib:format("~s: Warning: ~ts\n", [F,Mod:format_error(E)])}, [M|format_message(F, Es)]; format_message(_, []) -> []. @@ -851,12 +851,27 @@ eval_exprs([E|Es], Bs0, Lf, Ef, RBs) -> eval_exprs(Es, Bs, Lf, Ef, RBs). format_exception(Class, Reason) -> + Enc = encoding(), + P = case Enc of + latin1 -> "P"; + _ -> "tP" + end, PF = fun(Term, I) -> - io_lib:format("~." ++ integer_to_list(I) ++ "P", [Term, 50]) + io_lib:format("~." ++ integer_to_list(I) ++ P, [Term, 50]) end, StackTrace = erlang:get_stacktrace(), StackFun = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, - lib:format_exception(1, Class, Reason, StackTrace, StackFun, PF). + lib:format_exception(1, Class, Reason, StackTrace, StackFun, PF, Enc). + +encoding() -> + [{encoding, Encoding}] = enc(), + Encoding. + +enc() -> + case lists:keyfind(encoding, 1, io:getopts()) of + false -> [{encoding,latin1}]; % should never happen + Enc -> [Enc] + end. fatal(Str) -> throw(Str). diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 9f65bbfa3a..2644083733 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,14 +22,15 @@ get_chars/2,get_chars/3,get_line/1,get_line/2, get_password/0, get_password/1, setopts/1, setopts/2, getopts/0, getopts/1]). --export([write/1,write/2,read/1,read/2,read/3]). +-export([write/1,write/2,read/1,read/2,read/3,read/4]). -export([columns/0,columns/1,rows/0,rows/1]). -export([fwrite/1,fwrite/2,fwrite/3,fread/2,fread/3, format/1,format/2,format/3]). --export([scan_erl_exprs/1,scan_erl_exprs/2,scan_erl_exprs/3, - scan_erl_form/1,scan_erl_form/2,scan_erl_form/3, +-export([scan_erl_exprs/1,scan_erl_exprs/2,scan_erl_exprs/3,scan_erl_exprs/4, + scan_erl_form/1,scan_erl_form/2,scan_erl_form/3,scan_erl_form/4, parse_erl_exprs/1,parse_erl_exprs/2,parse_erl_exprs/3, - parse_erl_form/1,parse_erl_form/2,parse_erl_form/3]). + parse_erl_exprs/4,parse_erl_form/1,parse_erl_form/2, + parse_erl_form/3,parse_erl_form/4]). -export([request/1,request/2,requests/1,requests/2]). -export_type([device/0, format/0]). @@ -256,8 +257,21 @@ read(Io, Prompt) -> | {'eof', EndLine :: line()} | {'error', ErrorInfo :: erl_scan:error_info(), ErrorLine :: line()}. -read(Io, Prompt, StartLine) when is_integer(StartLine) -> - case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[StartLine]}) of +read(Io, Prompt, Pos0) -> + read(Io, Prompt, Pos0, []). + +-spec read(IoDevice, Prompt, StartLine, Options) -> Result when + IoDevice :: device(), + Prompt :: prompt(), + StartLine :: line(), + Options :: erl_scan:options(), + Result :: {'ok', Term :: term(), EndLine :: line()} + | {'eof', EndLine :: line()} + | {'error', ErrorInfo :: erl_scan:error_info(), ErrorLine :: line()}. + +read(Io, Prompt, Pos0, Options) when is_integer(Pos0), is_list(Options) -> + Args = [Pos0,Options], + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,Args}) of {ok,Toks,EndLine} -> case erl_parse:parse_term(Toks) of {ok,Term} -> {ok,Term,EndLine}; @@ -368,7 +382,17 @@ scan_erl_exprs(Io, Prompt) -> Result :: erl_scan:tokens_result() | request_error(). scan_erl_exprs(Io, Prompt, Pos0) -> - request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). + scan_erl_exprs(Io, Prompt, Pos0, []). + +-spec scan_erl_exprs(Device, Prompt, StartLine, Options) -> Result when + Device :: device(), + Prompt :: prompt(), + StartLine :: line(), + Options :: erl_scan:options(), + Result :: erl_scan:tokens_result() | request_error(). + +scan_erl_exprs(Io, Prompt, Pos0, Options) -> + request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0,Options]}). -spec scan_erl_form(Prompt) -> Result when Prompt :: prompt(), @@ -392,7 +416,17 @@ scan_erl_form(Io, Prompt) -> Result :: erl_scan:tokens_result() | request_error(). scan_erl_form(Io, Prompt, Pos0) -> - request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). + scan_erl_form(Io, Prompt, Pos0, []). + +-spec scan_erl_form(IoDevice, Prompt, StartLine, Options) -> Result when + IoDevice :: device(), + Prompt :: prompt(), + StartLine :: line(), + Options :: erl_scan:options(), + Result :: erl_scan:tokens_result() | request_error(). + +scan_erl_form(Io, Prompt, Pos0, Options) -> + request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0,Options]}). %% Parsing Erlang code. @@ -423,7 +457,17 @@ parse_erl_exprs(Io, Prompt) -> Result :: parse_ret(). parse_erl_exprs(Io, Prompt, Pos0) -> - case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}) of + parse_erl_exprs(Io, Prompt, Pos0, []). + +-spec parse_erl_exprs(IoDevice, Prompt, StartLine, Options) -> Result when + IoDevice :: device(), + Prompt :: prompt(), + StartLine :: line(), + Options :: erl_scan:options(), + Result :: parse_ret(). + +parse_erl_exprs(Io, Prompt, Pos0, Options) -> + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0,Options]}) of {ok,Toks,EndPos} -> case erl_parse:parse_exprs(Toks) of {ok,Exprs} -> {ok,Exprs,EndPos}; @@ -460,7 +504,18 @@ parse_erl_form(Io, Prompt) -> Result :: parse_form_ret(). parse_erl_form(Io, Prompt, Pos0) -> - case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}) of + parse_erl_form(Io, Prompt, Pos0, []). + +-spec parse_erl_form(IoDevice, Prompt, StartLine, Options) -> Result when + IoDevice :: device(), + Prompt :: prompt(), + StartLine :: line(), + Options :: erl_scan:options(), + Result :: parse_form_ret(). + +parse_erl_form(Io, Prompt, Pos0, Options) -> + Args = [Pos0, Options], + case request(Io, {get_until,unicode,Prompt,erl_scan,tokens,Args}) of {ok,Toks,EndPos} -> case erl_parse:parse_form(Toks) of {ok,Exprs} -> {ok,Exprs,EndPos}; diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index 513d904c39..5f57a2fa42 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -64,7 +64,10 @@ -export([write/1,write/2,write/3,nl/0,format_prompt/1]). -export([write_atom/1,write_string/1,write_string/2,write_unicode_string/1, - write_unicode_string/2, write_char/1, write_unicode_char/1]). + write_unicode_string/2, write_char/1, write_unicode_char/1]). + +-export([write_unicode_string_as_latin1/1, write_unicode_string_as_latin1/2, + write_unicode_char_as_latin1/1]). -export([quote_atom/2, char_list/1, unicode_char_list/1, deep_char_list/1, deep_unicode_char_list/1, @@ -75,11 +78,13 @@ collect_line/2, collect_line/3, collect_line/4, get_until/3, get_until/4]). --export_type([chars/0, continuation/0]). +-export_type([chars/0, unicode_chars/0, unicode_string/0, continuation/0]). %%---------------------------------------------------------------------- -type chars() :: [char() | chars()]. +-type unicode_chars() :: [unicode:unicode_char() | unicode_chars()]. +-type unicode_string() :: [unicode:unicode_char()]. -type depth() :: -1 | non_neg_integer(). -opaque continuation() :: {Format :: string(), @@ -330,11 +335,32 @@ write_string(S) -> write_string(S, Q) -> [Q|write_string1(latin1, S, Q)]. +%%% There are two functions to write Unicode strings: +%%% - they both escape control characters < 160; +%%% - write_unicode_string() never escapes characters >= 160; +%%% - write_unicode_string_as_latin1() also escapes characters >= 255. + +-spec write_unicode_string(UnicodeString) -> unicode_string() when + UnicodeString :: unicode_string(). + write_unicode_string(S) -> write_unicode_string(S, $"). %" +-spec write_unicode_string(unicode_string(), char()) -> unicode_string(). + write_unicode_string(S, Q) -> - [Q|write_string1(unicode, S, Q)]. + [Q|write_string1(unicode_as_unicode, S, Q)]. + +-spec write_unicode_string_as_latin1(UnicodeString) -> string() when + UnicodeString :: unicode_string(). + +write_unicode_string_as_latin1(S) -> + write_unicode_string_as_latin1(S, $"). %" + +-spec write_unicode_string_as_latin1(unicode_string(), char()) -> string(). + +write_unicode_string_as_latin1(S, Q) -> + [Q|write_string1(unicode_as_latin1, S, Q)]. write_string1(_,[], Q) -> [Q]; @@ -347,7 +373,11 @@ string_char(_,C, _, Tail) when C >= $\s, C =< $~ -> [C|Tail]; string_char(latin1,C, _, Tail) when C >= $\240, C =< $\377 -> [C|Tail]; -string_char(unicode,C, _, Tail) when C >= $\240 -> +string_char(unicode_as_unicode,C, _, Tail) when C >= $\240 -> + [C|Tail]; +string_char(unicode_as_latin1,C, _, Tail) when C >= $\240, C =< $\377 -> + [C|Tail]; +string_char(unicode_as_latin1,C, _, Tail) when C >= $\377 -> "\\x{"++erlang:integer_to_list(C, 16)++"}"++Tail; string_char(_,$\n, _, Tail) -> [$\\,$n|Tail]; %\n = LF string_char(_,$\r, _, Tail) -> [$\\,$r|Tail]; %\r = CR @@ -374,10 +404,22 @@ write_char($\s) -> "$\\s"; %Must special case this. write_char(C) when is_integer(C), C >= $\000, C =< $\377 -> [$$|string_char(latin1,C, -1, [])]. -write_unicode_char(Ch) when Ch =< 255 -> - write_char(Ch); -write_unicode_char(Uni) -> - [$$|string_char(unicode,Uni, -1, [])]. +%%% There are two functions to write a Unicode character: +%%% - they both escape control characters < 160; +%%% - write_unicode_char() never escapes characters >= 160; +%%% - write_unicode_char_as_latin1() also escapes characters >= 255. + +-spec write_unicode_char(UnicodeChar) -> unicode_string() when + UnicodeChar :: unicode:unicode_char(). + +write_unicode_char(Uni) when is_integer(Uni), Uni >= $\000 -> + [$$|string_char(unicode_as_unicode,Uni, -1, [])]. + +-spec write_unicode_char_as_latin1(UnicodeChar) -> string() when + UnicodeChar :: unicode:unicode_char(). + +write_unicode_char_as_latin1(Uni) when is_integer(Uni), Uni >= $\000 -> + [$$|string_char(unicode_as_latin1,Uni, -1, [])]. %% char_list(CharList) %% deep_char_list(CharList) @@ -392,7 +434,8 @@ char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> char_list([]) -> true; char_list(_) -> false. %Everything else is false --spec unicode_char_list(term()) -> boolean(). +-spec unicode_char_list(Term) -> boolean() when + Term :: term(). unicode_char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; is_integer(C), C > 16#DFFF, C < 16#FFFE; @@ -417,7 +460,8 @@ deep_char_list([], []) -> true; deep_char_list(_, _More) -> %Everything else is false false. --spec deep_unicode_char_list(term()) -> boolean(). +-spec deep_unicode_char_list(Term) -> boolean() when + Term :: term(). deep_unicode_char_list(Cs) -> deep_unicode_char_list(Cs, []). @@ -462,7 +506,8 @@ printable_list(_) -> false. %Everything else is false %% Everything that is not a control character and not invalid unicode %% will be considered printable. --spec printable_unicode_list(term()) -> boolean(). +-spec printable_unicode_list(Term) -> boolean() when + Term :: term(). printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> printable_unicode_list(Cs); diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 49a00a4ec7..5680f83ab6 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,7 +22,7 @@ -export([fwrite/2,fwrite_g/1,indentation/2]). -%% fwrite(Format, ArgList) -> [Char]. +%% fwrite(Format, ArgList) -> [unicode:unicode:char()]. %% Format the arguments in ArgList after string Format. Just generate %% an error if there is an error in the arguments. %% @@ -133,7 +133,7 @@ pcount([{$P,_As,_F,_Ad,_P,_Pad,_Enc}|Cs], Acc) -> pcount(Cs, Acc+1); pcount([_|Cs], Acc) -> pcount(Cs, Acc); pcount([], Acc) -> Acc. -%% build([Control], Pc, Indentation) -> [Char]. +%% build([Control], Pc, Indentation) -> [unicode:unicode_char()]. %% Interpret the control structures. Count the number of print %% remaining and only calculate indentation when necessary. Must also %% be smart when calculating indentation for characters in format. @@ -154,7 +154,7 @@ decr_pc($p, Pc) -> Pc - 1; decr_pc($P, Pc) -> Pc - 1; decr_pc(_, Pc) -> Pc. -%% indentation([Char], Indentation) -> Indentation. +%% indentation([unicode:unicode_char()], Indentation) -> Indentation. %% Calculate the indentation of the end of a string given its start %% indentation. We assume tabs at 8 cols. @@ -167,19 +167,19 @@ indentation([C|Cs], I) -> indentation([], I) -> I. %% control(FormatChar, [Argument], FieldWidth, Adjust, Precision, PadChar, -%% Indentation) -> -%% [Char] +%% Encoding, Indentation) -> +%% [unicode:unicode_char()] %% This is the main dispatch function for the various formatting commands. %% Field widths and precisions have already been calculated. control($w, [A], F, Adj, P, Pad, _Enc,_I) -> term(io_lib:write(A, -1), F, Adj, P, Pad); -control($p, [A], F, Adj, P, Pad, _Enc, I) -> - print(A, -1, F, Adj, P, Pad, I); +control($p, [A], F, Adj, P, Pad, Enc, I) -> + print(A, -1, F, Adj, P, Pad, Enc, I); control($W, [A,Depth], F, Adj, P, Pad, _Enc, _I) when is_integer(Depth) -> term(io_lib:write(A, Depth), F, Adj, P, Pad); -control($P, [A,Depth], F, Adj, P, Pad, _Enc, I) when is_integer(Depth) -> - print(A, Depth, F, Adj, P, Pad, I); +control($P, [A,Depth], F, Adj, P, Pad, Enc, I) when is_integer(Depth) -> + print(A, Depth, F, Adj, P, Pad, Enc, I); control($s, [A], F, Adj, P, Pad, _Enc, _I) when is_atom(A) -> string(atom_to_list(A), F, Adj, P, Pad); control($s, [L0], F, Adj, P, Pad, latin1, _I) -> @@ -187,6 +187,7 @@ control($s, [L0], F, Adj, P, Pad, latin1, _I) -> string(L, F, Adj, P, Pad); control($s, [L0], F, Adj, P, Pad, unicode, _I) -> L = unicode:characters_to_list(L0), + true = is_list(L), uniconv(string(L, F, Adj, P, Pad)); control($e, [A], F, Adj, P, Pad, _Enc, _I) when is_float(A) -> fwrite_e(A, F, Adj, P, Pad); @@ -256,13 +257,17 @@ term(T, F, Adj, P0, Pad) -> adjust(T, chars(Pad, F-L), Adj) end. -%% print(Term, Depth, Field, Adjust, Precision, PadChar, Indentation) +%% print(Term, Depth, Field, Adjust, Precision, PadChar, Encoding, +%% Indentation) %% Print a term. -print(T, D, none, Adj, P, Pad, I) -> print(T, D, 80, Adj, P, Pad, I); -print(T, D, F, Adj, none, Pad, I) -> print(T, D, F, Adj, I+1, Pad, I); -print(T, D, F, right, P, _Pad, _I) -> - io_lib_pretty:print(T, P, F, D). +print(T, D, none, Adj, P, Pad, E, I) -> print(T, D, 80, Adj, P, Pad, E, I); +print(T, D, F, Adj, none, Pad, E, I) -> print(T, D, F, Adj, I+1, Pad, E, I); +print(T, D, F, right, P, _Pad, latin1, _I) -> + io_lib_pretty:print(T, P, F, D); +print(T, D, F, right, P, _Pad, Enc, _I) -> + Options = [{column, P}, {line_length, F}, {depth, D}, {encoding, Enc}], + io_lib_pretty:print(T, Options). %% fwrite_e(Float, Field, Adjust, Precision, PadChar) @@ -608,7 +613,7 @@ prefixed_integer(Int, F, Adj, Base, Pad, Prefix, Lowercase) term([Prefix|S], F, Adj, none, Pad) end. -%% char(Char, Field, Adjust, Precision, PadChar) -> [Char]. +%% char(Char, Field, Adjust, Precision, PadChar) -> [unicode:unicode_char()]. char(C, none, _Adj, none, _Pad) -> [C]; char(C, F, _Adj, none, _Pad) -> chars(C, F); diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 169410796b..99ad281a9b 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -33,43 +33,76 @@ %% print(Term, Column, LineLength, Depth) -> [Chars] %% Depth = -1 gives unlimited print depth. Use io_lib:write for atomic terms. +-spec print(term()) -> io_lib:chars(). + print(Term) -> print(Term, 1, 80, -1). %% print(Term, RecDefFun) -> [Chars] %% print(Term, Depth, RecDefFun) -> [Chars] %% RecDefFun = fun(Tag, NoFields) -> [FieldTag] | no -%% Used by the shell for printing records. +%% Used by the shell for printing records and for Unicode. + +-type rec_print_fun() :: fun((Tag :: atom(), NFields :: non_neg_integer()) -> + no | [FieldName :: atom()]). +-type column() :: integer(). +-type line_length() :: pos_integer(). +-type depth() :: integer(). +-type max_chars() :: integer(). + +-type chars() :: io_lib:chars(). +-type unicode_chars() :: io_lib:unicode_chars(). +-type option() :: {column, column()} + | {line_length, line_length()} + | {depth, depth()} + | {max_chars, max_chars()} + | {record_print_fun, rec_print_fun()} + | {encoding, latin1 | utf8 | unicode}. +-type options() :: [option()]. + +-spec print(term(), rec_print_fun()) -> chars() | unicode_chars(); + (term(), options()) -> chars() | unicode_chars(). + +print(Term, Options) when is_list(Options) -> + Col = proplists:get_value(column, Options, 1), + Ll = proplists:get_value(line_length, Options, 80), + D = proplists:get_value(depth, Options, -1), + M = proplists:get_value(max_chars, Options, -1), + RecDefFun = proplists:get_value(record_print_fun, Options, no_fun), + Encoding = proplists:get_value(encoding, Options, epp:default_encoding()), + print(Term, Col, Ll, D, M, RecDefFun, Encoding); print(Term, RecDefFun) -> print(Term, -1, RecDefFun). +-spec print(term(), depth(), rec_print_fun()) -> chars() | unicode_chars(). + print(Term, Depth, RecDefFun) -> print(Term, 1, 80, Depth, RecDefFun). +-spec print(term(), column(), line_length(), depth()) -> + chars() | unicode_chars(). + print(Term, Col, Ll, D) -> - print(Term, Col, Ll, D, _M=-1, no_fun). + print(Term, Col, Ll, D, _M=-1, no_fun, latin1). +-spec print(term(), column(), line_length(), depth(), rec_print_fun()) -> + chars() | unicode_chars(). print(Term, Col, Ll, D, RecDefFun) -> print(Term, Col, Ll, D, _M=-1, RecDefFun). -print(_, _, _, 0, _M, _RF) -> "..."; -print(Term, Col, Ll, D, M, RecDefFun) when Col =< 0 -> - print(Term, 1, Ll, D, M, RecDefFun); -print(Term, Col, Ll, D, M0, RecDefFun) when is_tuple(Term); - is_list(Term) -> - If = {_S, Len} = print_length(Term, D, RecDefFun), - M = max_cs(M0, Len), - if - Len < Ll - Col, Len =< M -> - write(If); - true -> - TInd = while_fail([-1, 4], - fun(I) -> cind(If, Col, Ll, M, I, 0, 0) end, - 1), - pp(If, Col, Ll, M, TInd, indent(Col), 0, 0) - end; -print(<<_/bitstring>>=Term, Col, Ll, D, M0, RecDefFun) -> - If = {_S, Len} = print_length(Term, D, RecDefFun), +-spec print(term(), column(), line_length(), depth(), max_chars(), + rec_print_fun()) -> chars() | unicode_chars(). + +print(Term, Col, Ll, D, M, RecDefFun) -> + print(Term, Col, Ll, D, M, RecDefFun, latin1). + +print(_, _, _, 0, _M, _RF, _Enc) -> "..."; +print(Term, Col, Ll, D, M, RecDefFun, Enc) when Col =< 0 -> + print(Term, 1, Ll, D, M, RecDefFun, Enc); +print(Term, Col, Ll, D, M0, RecDefFun, Enc) when is_tuple(Term); + is_list(Term); + is_bitstring(Term) -> + If = {_S, Len} = print_length(Term, D, RecDefFun, Enc), M = max_cs(M0, Len), if Len < Ll - Col, Len =< M -> @@ -80,7 +113,7 @@ print(<<_/bitstring>>=Term, Col, Ll, D, M0, RecDefFun) -> 1), pp(If, Col, Ll, M, TInd, indent(Col), 0, 0) end; -print(Term, _Col, _Ll, _D, _M, _RF) -> +print(Term, _Col, _Ll, _D, _M, _RF, _Enc) -> io_lib:write(Term). %%% @@ -294,50 +327,56 @@ write_tail(E, S) -> %% counted but need to be added later. %% D =/= 0 -print_length([], _D, _RF) -> +print_length([], _D, _RF, _Enc) -> {"[]", 2}; -print_length({}, _D, _RF) -> +print_length({}, _D, _RF, _Enc) -> {"{}", 2}; -print_length(List, D, RF) when is_list(List) -> - case printable_list(List, D) of +print_length(List, D, RF, Enc) when is_list(List) -> + case printable_list(List, D, Enc) of true -> - S = io_lib:write_string(List, $"), %" + S = write_string(List, Enc), {S, length(S)}; %% Truncated lists could break some existing code. % {true, Prefix} -> - % S = io_lib:write_string(Prefix, $"), %" + % S = write_string(Prefix, Enc), % {[S | "..."], 3 + length(S)}; false -> - print_length_list(List, D, RF) + print_length_list(List, D, RF, Enc) end; -print_length(Fun, _D, _RF) when is_function(Fun) -> +print_length(Fun, _D, _RF, _Enc) when is_function(Fun) -> S = io_lib:write(Fun), {S, iolist_size(S)}; -print_length(R, D, RF) when is_atom(element(1, R)), - is_function(RF) -> +print_length(R, D, RF, Enc) when is_atom(element(1, R)), + is_function(RF) -> case RF(element(1, R), tuple_size(R) - 1) of no -> - print_length_tuple(R, D, RF); + print_length_tuple(R, D, RF, Enc); RDefs -> - print_length_record(R, D, RF, RDefs) + print_length_record(R, D, RF, RDefs, Enc) end; -print_length(Tuple, D, RF) when is_tuple(Tuple) -> - print_length_tuple(Tuple, D, RF); -print_length(<<>>, _D, _RF) -> +print_length(Tuple, D, RF, Enc) when is_tuple(Tuple) -> + print_length_tuple(Tuple, D, RF, Enc); +print_length(<<>>, _D, _RF, _Enc) -> {"<<>>", 4}; -print_length(<<_/bitstring>>, 1, _RF) -> +print_length(<<_/bitstring>>, 1, _RF, _Enc) -> {"<<...>>", 7}; -print_length(<<_/bitstring>>=Bin, D, _RF) -> +print_length(<<_/bitstring>>=Bin, D, _RF, Enc) -> case bit_size(Bin) rem 8 of 0 -> D1 = D - 1, - case printable_bin(Bin, D1) of - List when is_list(List) -> - S = io_lib:write_string(List, $"), + case printable_bin(Bin, D1, Enc) of + {true, List} when is_list(List) -> + S = io_lib:write_string(List, $"), %" {[$<,$<,S,$>,$>], 4 + length(S)}; - {true, Prefix} -> - S = io_lib:write_string(Prefix, $"), - {[$<,$<, S | "...>>"], 4 + length(S)}; + {false, List} when is_list(List) -> + S = io_lib:write_unicode_string(List, $"), %" + {[$<,$<,S,"/utf8>>"], 9 + length(S)}; + {true, true, Prefix} -> + S = io_lib:write_string(Prefix, $"), %" + {[$<,$<, S | "...>>"], 7 + length(S)}; + {false, true, Prefix} -> + S = io_lib:write_unicode_string(Prefix, $"), %" + {[$<,$<, S | "/utf8...>>"], 12 + length(S)}; false -> S = io_lib:write(Bin, D), {{bin,S}, iolist_size(S)} @@ -346,51 +385,51 @@ print_length(<<_/bitstring>>=Bin, D, _RF) -> S = io_lib:write(Bin, D), {{bin,S}, iolist_size(S)} end; -print_length(Term, _D, _RF) -> +print_length(Term, _D, _RF, _Enc) -> S = io_lib:write(Term), {S, iolist_size(S)}. -print_length_tuple(_Tuple, 1, _RF) -> +print_length_tuple(_Tuple, 1, _RF, _Enc) -> {"{...}", 5}; -print_length_tuple(Tuple, D, RF) -> - L = print_length_list1(tuple_to_list(Tuple), D, RF), +print_length_tuple(Tuple, D, RF, Enc) -> + L = print_length_list1(tuple_to_list(Tuple), D, RF, Enc), IsTagged = is_atom(element(1, Tuple)) and (tuple_size(Tuple) > 1), {{tuple,IsTagged,L}, list_length(L, 2)}. -print_length_record(_Tuple, 1, _RF, _RDefs) -> +print_length_record(_Tuple, 1, _RF, _RDefs, _Enc) -> {"{...}", 5}; -print_length_record(Tuple, D, RF, RDefs) -> +print_length_record(Tuple, D, RF, RDefs, Enc) -> Name = [$# | io_lib:write_atom(element(1, Tuple))], NameL = length(Name), - L = print_length_fields(RDefs, D - 1, tl(tuple_to_list(Tuple)), RF), + L = print_length_fields(RDefs, D - 1, tl(tuple_to_list(Tuple)), RF, Enc), {{record, [{Name,NameL} | L]}, list_length(L, NameL + 2)}. -print_length_fields([], _D, [], _RF) -> +print_length_fields([], _D, [], _RF, _Enc) -> []; -print_length_fields(_, 1, _, _RF) -> +print_length_fields(_, 1, _, _RF, _Enc) -> {dots, 3}; -print_length_fields([Def | Defs], D, [E | Es], RF) -> - [print_length_field(Def, D - 1, E, RF) | - print_length_fields(Defs, D - 1, Es, RF)]. +print_length_fields([Def | Defs], D, [E | Es], RF, Enc) -> + [print_length_field(Def, D - 1, E, RF, Enc) | + print_length_fields(Defs, D - 1, Es, RF, Enc)]. -print_length_field(Def, D, E, RF) -> +print_length_field(Def, D, E, RF, Enc) -> Name = io_lib:write_atom(Def), - {S, L} = print_length(E, D, RF), + {S, L} = print_length(E, D, RF, Enc), NameL = length(Name) + 3, {{field, Name, NameL, {S, L}}, NameL + L}. -print_length_list(List, D, RF) -> - L = print_length_list1(List, D, RF), +print_length_list(List, D, RF, Enc) -> + L = print_length_list1(List, D, RF, Enc), {{list, L}, list_length(L, 2)}. -print_length_list1([], _D, _RF) -> +print_length_list1([], _D, _RF, _Enc) -> []; -print_length_list1(_, 1, _RF) -> +print_length_list1(_, 1, _RF, _Enc) -> {dots, 3}; -print_length_list1([E | Es], D, RF) -> - [print_length(E, D - 1, RF) | print_length_list1(Es, D - 1, RF)]; -print_length_list1(E, D, RF) -> - print_length(E, D - 1, RF). +print_length_list1([E | Es], D, RF, Enc) -> + [print_length(E, D - 1, RF, Enc) | print_length_list1(Es, D - 1, RF, Enc)]; +print_length_list1(E, D, RF, Enc) -> + print_length(E, D - 1, RF, Enc). list_length([], Acc) -> Acc; @@ -409,16 +448,16 @@ list_length_tail({_, Len}, Acc) -> %% ?CHARS printable characters has depth 1. -define(CHARS, 4). -printable_list(L, D) when D < 0 -> - io_lib:printable_list(L); -printable_list(_L, 1) -> +printable_list(_L, 1, _Enc) -> false; -printable_list(L, _D) -> - io_lib:printable_list(L). +printable_list(L, _D, latin1) -> + io_lib:printable_list(L); +printable_list(L, _D, _Uni) -> + io_lib:printable_unicode_list(L). %% Truncated lists could break some existing code. -% printable_list(L, D) -> +% printable_list(L, D, Enc) when D >= 0 -> % Len = ?CHARS * (D - 1), -% case printable_list1(L, Len) of +% case printable_list1(L, Len, Enc) of % all -> % true; % N when is_integer(N), Len - N >= D - 1 -> @@ -428,32 +467,41 @@ printable_list(L, _D) -> % false % end. -printable_bin(Bin, D) when D >= 0, ?CHARS * D =< byte_size(Bin) -> - printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D); -printable_bin(Bin, D) -> - printable_bin(Bin, byte_size(Bin), D). +printable_bin(Bin, D, Enc) when D >= 0, ?CHARS * D =< byte_size(Bin) -> + printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D, Enc); +printable_bin(Bin, D, Enc) -> + printable_bin(Bin, byte_size(Bin), D, Enc). -printable_bin(Bin, Len, D) -> +printable_bin(Bin, Len, D, latin1) -> N = erlang:min(20, Len), L = binary_to_list(Bin, 1, N), case printable_list1(L, N) of all when N =:= byte_size(Bin) -> - L; - all when N =:= Len -> % N < byte_size(Bin) {true, L}; + all when N =:= Len -> % N < byte_size(Bin) + {true, true, L}; all -> case printable_bin1(Bin, 1 + N, Len - N) of 0 when byte_size(Bin) =:= Len -> - binary_to_list(Bin); + {true, binary_to_list(Bin)}; NC when D > 0, Len - NC >= D -> - {true, binary_to_list(Bin, 1, Len - NC)}; + {true, true, binary_to_list(Bin, 1, Len - NC)}; NC when is_integer(NC) -> false end; NC when is_integer(NC), D > 0, N - NC >= D -> - {true, binary_to_list(Bin, 1, N - NC)}; + {true, true, binary_to_list(Bin, 1, N - NC)}; NC when is_integer(NC) -> false + end; +printable_bin(Bin, Len, D, _Uni) -> + case printable_unicode(Bin, Len, []) of + {_, <<>>, L} -> + {byte_size(Bin) =:= length(L), L}; + {NC, Bin1, L} when D > 0, Len - NC >= D -> + {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; + {_NC, _Bin, _L} -> + false end. printable_bin1(_Bin, _Start, 0) -> @@ -484,6 +532,16 @@ printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1); printable_list1([], _) -> all; printable_list1(_, N) -> N. +printable_unicode(<<C/utf8, R/binary>>, I, L) when I > 0 -> + printable_unicode(R, I - 1, [C | L]); +printable_unicode(Bin, I, L) -> + {I, Bin, lists:reverse(L)}. + +write_string(S, latin1) -> + io_lib:write_string(S, $"); %" +write_string(S, _Uni) -> + io_lib:write_unicode_string(S, $"). %" + %% Throw 'no_good' if the indentation exceeds half the line length %% unless there is room for M characters on the line. diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl index cf4b87d7eb..b2ce2a5a8f 100644 --- a/lib/stdlib/src/lib.erl +++ b/lib/stdlib/src/lib.erl @@ -21,8 +21,9 @@ -export([flush_receive/0, error_message/2, progname/0, nonl/1, send/2, sendw/2, eval_str/1]). --export([format_exception/6, format_stacktrace/4, - format_call/4, format_fun/1]). +-export([format_exception/6, format_exception/7, + format_stacktrace/4, format_stacktrace/5, + format_call/4, format_call/5, format_fun/1]). -spec flush_receive() -> 'ok'. @@ -128,32 +129,49 @@ all_white(_) -> false. %% as indentation whenever newline has been inserted); %% Class, Reason and StackTrace are the exception; %% FormatFun = fun(Term, I) -> iolist() formats terms; -%% StackFun = fun(Mod, Fun, Arity) -> bool() is used for trimming the +%% StackFun = fun(Mod, Fun, Arity) -> boolean() is used for trimming the %% end of the stack (typically calls to erl_eval are skipped). -format_exception(I, Class, Reason, StackTrace, StackFun, FormatFun) +format_exception(I, Class, Reason, StackTrace, StackFun, FormatFun) -> + format_exception(I, Class, Reason, StackTrace, StackFun, FormatFun, + latin1). + +%% -> iolist() | unicode:charlist() (no \n at end) +%% FormatFun = fun(Term, I) -> iolist() | unicode:charlist(). +format_exception(I, Class, Reason, StackTrace, StackFun, FormatFun, Encoding) when is_integer(I), I >= 1, is_function(StackFun, 3), is_function(FormatFun, 2) -> S = n_spaces(I-1), {Term,Trace1,Trace} = analyze_exception(Class, Reason, StackTrace), - Expl0 = explain_reason(Term, Class, Trace1, FormatFun, S), - Expl = io_lib:fwrite(<<"~s~s">>, [exited(Class), Expl0]), - case format_stacktrace1(S, Trace, FormatFun, StackFun) of + Expl0 = explain_reason(Term, Class, Trace1, FormatFun, S, Encoding), + FormatString = case Encoding of + latin1 -> "~s~s"; + _ -> "~s~ts" + end, + Expl = io_lib:fwrite(FormatString, [exited(Class), Expl0]), + case format_stacktrace1(S, Trace, FormatFun, StackFun, Encoding) of [] -> Expl; Stack -> [Expl, $\n, Stack] end. %% -> iolist() (no \n at end) -format_stacktrace(I, StackTrace, StackFun, FormatFun) +format_stacktrace(I, StackTrace, StackFun, FormatFun) -> + format_stacktrace(I, StackTrace, StackFun, FormatFun, latin1). + +%% -> iolist() | unicode:charlist() (no \n at end) +format_stacktrace(I, StackTrace, StackFun, FormatFun, Encoding) when is_integer(I), I >= 1, is_function(StackFun, 3), is_function(FormatFun, 2) -> S = n_spaces(I-1), - format_stacktrace1(S, StackTrace, FormatFun, StackFun). + format_stacktrace1(S, StackTrace, FormatFun, StackFun, Encoding). %% -> iolist() (no \n at end) -format_call(I, ForMForFun, As, FormatFun) when is_integer(I), I >= 1, - is_list(As), - is_function(FormatFun, 2) -> - format_call("", n_spaces(I-1), ForMForFun, As, FormatFun). +format_call(I, ForMForFun, As, FormatFun) -> + format_call(I, ForMForFun, As, FormatFun, latin1). + +%% -> iolist() | unicode:charlist() (no \n at end) +format_call(I, ForMForFun, As, FormatFun, Enc) + when is_integer(I), I >= 1, is_list(As), is_function(FormatFun, 2) -> + format_call("", n_spaces(I-1), ForMForFun, As, FormatFun, Enc). %% -> iolist() (no \n at end) format_fun(Fun) when is_function(Fun) -> @@ -204,79 +222,80 @@ is_stacktrace(_) -> false. %% ERTS exit codes (some of them are also returned by erl_eval): -explain_reason(badarg, error, [], _PF, _S) -> +explain_reason(badarg, error, [], _PF, _S, _Enc) -> <<"bad argument">>; -explain_reason({badarg,V}, error=Cl, [], PF, S) -> % orelse, andalso +explain_reason({badarg,V}, error=Cl, [], PF, S, _Enc) -> % orelse, andalso format_value(V, <<"bad argument: ">>, Cl, PF, S); -explain_reason(badarith, error, [], _PF, _S) -> +explain_reason(badarith, error, [], _PF, _S, _Enc) -> <<"an error occurred when evaluating an arithmetic expression">>; -explain_reason({badarity,{Fun,As}}, error, [], _PF, _S) +explain_reason({badarity,{Fun,As}}, error, [], _PF, _S, _Enc) when is_function(Fun) -> %% Only the arity is displayed, not the arguments As. io_lib:fwrite(<<"~s called with ~s">>, [format_fun(Fun), argss(length(As))]); -explain_reason({badfun,Term}, error=Cl, [], PF, S) -> +explain_reason({badfun,Term}, error=Cl, [], PF, S, _Enc) -> format_value(Term, <<"bad function ">>, Cl, PF, S); -explain_reason({badmatch,Term}, error=Cl, [], PF, S) -> - format_value(Term, <<"no match of right hand side value ">>, Cl, PF, S); -explain_reason({case_clause,V}, error=Cl, [], PF, S) -> +explain_reason({badmatch,Term}, error=Cl, [], PF, S, _Enc) -> + Str = <<"no match of right hand side value ">>, + format_value(Term, Str, Cl, PF, S); +explain_reason({case_clause,V}, error=Cl, [], PF, S, _Enc) -> %% "there is no case clause with a true guard sequence and a %% pattern matching..." format_value(V, <<"no case clause matching ">>, Cl, PF, S); -explain_reason(function_clause, error, [{F,A}], _PF, _S) -> +explain_reason(function_clause, error, [{F,A}], _PF, _S, _Enc) -> %% Shell commands FAs = io_lib:fwrite(<<"~w/~w">>, [F, A]), [<<"no function clause matching call to ">> | FAs]; -explain_reason(function_clause, error=Cl, [{M,F,As,Loc}], PF, S) -> +explain_reason(function_clause, error=Cl, [{M,F,As,Loc}], PF, S, Enc) -> Str = <<"no function clause matching ">>, - [format_errstr_call(Str, Cl, {M,F}, As, PF, S),$\s|location(Loc)]; -explain_reason(if_clause, error, [], _PF, _S) -> + [format_errstr_call(Str, Cl, {M,F}, As, PF, S, Enc),$\s|location(Loc)]; +explain_reason(if_clause, error, [], _PF, _S, _Enc) -> <<"no true branch found when evaluating an if expression">>; -explain_reason(noproc, error, [], _PF, _S) -> +explain_reason(noproc, error, [], _PF, _S, _Enc) -> <<"no such process or port">>; -explain_reason(notalive, error, [], _PF, _S) -> +explain_reason(notalive, error, [], _PF, _S, _Enc) -> <<"the node cannot be part of a distributed system">>; -explain_reason(system_limit, error, [], _PF, _S) -> +explain_reason(system_limit, error, [], _PF, _S, _Enc) -> <<"a system limit has been reached">>; -explain_reason(timeout_value, error, [], _PF, _S) -> +explain_reason(timeout_value, error, [], _PF, _S, _Enc) -> <<"bad receive timeout value">>; -explain_reason({try_clause,V}, error=Cl, [], PF, S) -> +explain_reason({try_clause,V}, error=Cl, [], PF, S, _Enc) -> %% "there is no try clause with a true guard sequence and a %% pattern matching..." format_value(V, <<"no try clause matching ">>, Cl, PF, S); -explain_reason(undef, error, [{M,F,A,_}], _PF, _S) -> +explain_reason(undef, error, [{M,F,A,_}], _PF, _S, _Enc) -> %% Only the arity is displayed, not the arguments, if there are any. io_lib:fwrite(<<"undefined function ~s">>, [mfa_to_string(M, F, n_args(A))]); -explain_reason({shell_undef,F,A,_}, error, [], _PF, _S) -> +explain_reason({shell_undef,F,A,_}, error, [], _PF, _S, _Enc) -> %% Give nicer reports for undefined shell functions %% (but not when the user actively calls shell_default:F(...)). io_lib:fwrite(<<"undefined shell command ~s/~w">>, [F, n_args(A)]); %% Exit codes returned by erl_eval only: -explain_reason({argument_limit,_Fun}, error, [], _PF, _S) -> +explain_reason({argument_limit,_Fun}, error, [], _PF, _S, _Enc) -> io_lib:fwrite(<<"limit of number of arguments to interpreted function" " exceeded">>, []); -explain_reason({bad_filter,V}, error=Cl, [], PF, S) -> +explain_reason({bad_filter,V}, error=Cl, [], PF, S, _Enc) -> format_value(V, <<"bad filter ">>, Cl, PF, S); -explain_reason({bad_generator,V}, error=Cl, [], PF, S) -> +explain_reason({bad_generator,V}, error=Cl, [], PF, S, _Enc) -> format_value(V, <<"bad generator ">>, Cl, PF, S); -explain_reason({unbound,V}, error, [], _PF, _S) -> +explain_reason({unbound,V}, error, [], _PF, _S, _Enc) -> io_lib:fwrite(<<"variable ~w is unbound">>, [V]); %% Exit codes local to the shell module (restricted shell): -explain_reason({restricted_shell_bad_return, V}, exit=Cl, [], PF, S) -> +explain_reason({restricted_shell_bad_return, V}, exit=Cl, [], PF, S, _Enc) -> Str = <<"restricted shell module returned bad value ">>, format_value(V, Str, Cl, PF, S); explain_reason({restricted_shell_disallowed,{ForMF,As}}, - exit=Cl, [], PF, S) -> + exit=Cl, [], PF, S, Enc) -> %% ForMF can be a fun, but not a shell fun. Str = <<"restricted shell does not allow ">>, - format_errstr_call(Str, Cl, ForMF, As, PF, S); -explain_reason(restricted_shell_started, exit, [], _PF, _S) -> + format_errstr_call(Str, Cl, ForMF, As, PF, S, Enc); +explain_reason(restricted_shell_started, exit, [], _PF, _S, _Enc) -> <<"restricted shell starts now">>; -explain_reason(restricted_shell_stopped, exit, [], _PF, _S) -> +explain_reason(restricted_shell_stopped, exit, [], _PF, _S, _Enc) -> <<"restricted shell stopped">>; %% Other exit code: -explain_reason(Reason, Class, [], PF, S) -> +explain_reason(Reason, Class, [], PF, S, _Enc) -> PF(Reason, (iolist_size(S)+1) + exited_size(Class)). n_args(A) when is_integer(A) -> @@ -293,28 +312,28 @@ argss(2) -> argss(I) -> io_lib:fwrite(<<"~w arguments">>, [I]). -format_stacktrace1(S0, Stack0, PF, SF) -> +format_stacktrace1(S0, Stack0, PF, SF, Enc) -> Stack1 = lists:dropwhile(fun({M,F,A,_}) -> SF(M, F, A) end, lists:reverse(Stack0)), S = [" " | S0], Stack = lists:reverse(Stack1), - format_stacktrace2(S, Stack, 1, PF). + format_stacktrace2(S, Stack, 1, PF, Enc). -format_stacktrace2(S, [{M,F,A,L}|Fs], N, PF) when is_integer(A) -> +format_stacktrace2(S, [{M,F,A,L}|Fs], N, PF, Enc) when is_integer(A) -> [io_lib:fwrite(<<"~s~s ~s ~s">>, [sep(N, S), origin(N, M, F, A), mfa_to_string(M, F, A), location(L)]) - | format_stacktrace2(S, Fs, N + 1, PF)]; -format_stacktrace2(S, [{M,F,As,_}|Fs], N, PF) when is_list(As) -> + | format_stacktrace2(S, Fs, N + 1, PF, Enc)]; +format_stacktrace2(S, [{M,F,As,_}|Fs], N, PF, Enc) when is_list(As) -> A = length(As), CalledAs = [S,<<" called as ">>], - C = format_call("", CalledAs, {M,F}, As, PF), - [io_lib:fwrite(<<"~s~s ~s\n~s~s">>, + C = format_call("", CalledAs, {M,F}, As, PF, Enc), + [io_lib:fwrite(<<"~s~s ~s\n~s~ts">>, [sep(N, S), origin(N, M, F, A), mfa_to_string(M, F, A), CalledAs, C]) - | format_stacktrace2(S, Fs, N + 1, PF)]; -format_stacktrace2(_S, [], _N, _PF) -> + | format_stacktrace2(S, Fs, N + 1, PF, Enc)]; +format_stacktrace2(_S, [], _N, _PF, _Enc) -> "". location(L) -> @@ -338,22 +357,22 @@ origin(1, M, F, A) -> origin(_N, _M, _F, _A) -> <<"in call from">>. -format_errstr_call(ErrStr, Class, ForMForFun, As, PF, Pre0) -> +format_errstr_call(ErrStr, Class, ForMForFun, As, PF, Pre0, Enc) -> Pre1 = [Pre0 | n_spaces(exited_size(Class))], - format_call(ErrStr, Pre1, ForMForFun, As, PF). + format_call(ErrStr, Pre1, ForMForFun, As, PF, Enc). -format_call(ErrStr, Pre1, ForMForFun, As, PF) -> +format_call(ErrStr, Pre1, ForMForFun, As, PF, Enc) -> Arity = length(As), [ErrStr | case is_op(ForMForFun, Arity) of {yes,Op} -> - format_op(ErrStr, Pre1, Op, As, PF); + format_op(ErrStr, Pre1, Op, As, PF, Enc); no -> MFs = mf_to_string(ForMForFun, Arity), I1 = iolist_size([Pre1,ErrStr|MFs]), - S1 = pp_arguments(PF, As, I1), - S2 = pp_arguments(PF, As, iolist_size([Pre1|MFs])), - Long = count_nl(pp_arguments(PF, [a2345,b2345], I1)) > 0, + S1 = pp_arguments(PF, As, I1, Enc), + S2 = pp_arguments(PF, As, iolist_size([Pre1|MFs]), Enc), + Long = count_nl(pp_arguments(PF, [a2345,b2345], I1, Enc)) > 0, case Long or (count_nl(S2) < count_nl(S1)) of true -> [$\n, Pre1, MFs, S2]; @@ -362,11 +381,11 @@ format_call(ErrStr, Pre1, ForMForFun, As, PF) -> end end]. -format_op(ErrStr, Pre, Op, [A1], PF) -> +format_op(ErrStr, Pre, Op, [A1], PF, _Enc) -> OpS = io_lib:fwrite(<<"~s ">>, [Op]), I1 = iolist_size([ErrStr,Pre,OpS]), [OpS | PF(A1, I1+1)]; -format_op(ErrStr, Pre, Op, [A1, A2], PF) -> +format_op(ErrStr, Pre, Op, [A1, A2], PF, Enc) -> I1 = iolist_size([ErrStr,Pre]), S1 = PF(A1, I1+1), S2 = PF(A2, I1+1), @@ -377,33 +396,40 @@ format_op(ErrStr, Pre, Op, [A1, A2], PF) -> [S1,Pre1,OpS,Pre1|S2]; false -> OpS2 = io_lib:fwrite(<<" ~s ">>, [Op]), - S2_2 = PF(A2, iolist_size([ErrStr,Pre,S1|OpS2])+1), + Size1 = iolist_size([ErrStr,Pre|OpS2]), + {Size2,S1_2} = size(Enc, S1), + S2_2 = PF(A2, Size1+Size2+1), case count_nl(S2) < count_nl(S2_2) of true -> - [S1,Pre1,OpS,Pre1|S2]; + [S1_2,Pre1,OpS,Pre1|S2]; false -> - [S1,OpS2|S2_2] + [S1_2,OpS2|S2_2] end end. -pp_arguments(PF, As, I) -> - case {As, io_lib:printable_list(As)} of +pp_arguments(PF, As, I, Enc) -> + case {As, printable_list(Enc, As)} of {[Int | T], true} -> L = integer_to_list(Int), Ll = length(L), A = list_to_atom(lists:duplicate(Ll, $a)), - S0 = binary_to_list(iolist_to_binary(PF([A | T], I+1))), - brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)]); + S0 = unicode:characters_to_list(PF([A | T], I+1), Enc), + brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc); _ -> - brackets_to_parens(PF(As, I+1)) + brackets_to_parens(PF(As, I+1), Enc) end. -brackets_to_parens(S) -> - B = iolist_to_binary(S), +brackets_to_parens(S, Enc) -> + B = unicode:characters_to_binary(S, Enc), Sz = byte_size(B) - 2, <<$[,R:Sz/binary,$]>> = B, [$(,R,$)]. +printable_list(latin1, As) -> + io_lib:printable_list(As); +printable_list(_, As) -> + io_lib:printable_unicode_list(As). + mfa_to_string(M, F, A) -> io_lib:fwrite(<<"~s/~w">>, [mf_to_string({M, F}, A), A]). @@ -472,3 +498,10 @@ exited(exit) -> <<"exception exit: ">>; exited(throw) -> <<"exception throw: ">>. + +size(latin1, S) -> + {iolist_size(S),S}; +size(_, S0) -> + S = unicode:characters_to_list(S0, unicode), + true = is_list(S), + {length(S),S}. diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index dc450f0ee6..424650b8b3 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -128,7 +128,7 @@ start_restricted(RShMod) when is_atom(RShMod) -> error_logger:error_report( lists:flatten( io_lib:fwrite( - <<"Restricted shell module ~w not found: ~p\n">>, + "Restricted shell module ~w not found: ~"++cs_p() ++"\n", [RShMod,What]))), Error end. @@ -192,7 +192,6 @@ server(StartSync) -> end, Bs0, default_packages()), default_modules()), - %% io:fwrite("Imported modules: ~p.\n", [erl_eval:bindings(Bs)]), %% Use an Ets table for record definitions. It takes too long to %% send a huge term to and from the evaluator. Ets makes it @@ -230,9 +229,10 @@ server(StartSync) -> ok; {RShMod2,What2} -> io:fwrite( - <<"Warning! Restricted shell module ~w not found: ~p.\n" - "Only the commands q() and init:stop() will be allowed!\n">>, - [RShMod2,What2]), + ("Warning! Restricted shell module ~w not found: ~" + ++cs_p()++".\n" + "Only the commands q() and init:stop() will be allowed!\n"), + [RShMod2,What2]), application:set_env(stdlib, restricted_shell, ?MODULE) end, @@ -263,11 +263,11 @@ server_loop(N0, Eval_0, Bs00, RT, Ds00, History0, Results0) -> end, server_loop(N, Eval, Bs, RT, Ds, History, Results); {error,E} -> - fwrite_severity(benign, <<"~s">>, [E]), + fwrite_severity(benign, <<"~ts">>, [E]), server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0) end; {error,{Line,Mod,What},_EndLine} -> - fwrite_severity(benign, <<"~w: ~s">>, + fwrite_severity(benign, <<"~w: ~ts">>, [Line, Mod:format_error(What)]), server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0); {error,terminated} -> %Io process terminated @@ -277,7 +277,7 @@ server_loop(N0, Eval_0, Bs00, RT, Ds00, History0, Results0) -> exit(Eval0, kill), {_,Eval,_,_} = shell_rep(Eval0, Bs0, RT, Ds0), server_loop(N0, Eval, Bs0, RT, Ds0, History0, Results0); - {error,tokens} -> %Most probably unicode > 255 + {error,tokens} -> %Most probably character > 255 fwrite_severity(benign, <<"~w: Invalid tokens.">>, [N]), server_loop(N0, Eval0, Bs0, RT, Ds0, History0, Results0); @@ -290,7 +290,10 @@ server_loop(N0, Eval_0, Bs00, RT, Ds00, History0, Results0) -> end. get_command(Prompt, Eval, Bs, RT, Ds) -> - Parse = fun() -> exit(io:parse_erl_exprs(Prompt)) end, + Parse = + fun() -> + exit(io:parse_erl_exprs(group_leader(), Prompt, 1, [unicode])) + end, Pid = spawn_link(Parse), get_command1(Pid, Eval, Bs, RT, Ds). @@ -337,7 +340,7 @@ get_prompt_func() -> end. bad_prompt_func(M) -> - fwrite_severity(benign, <<"Bad prompt function: ~p">>, [M]). + fwrite_severity(benign, "Bad prompt function: ~"++cs_p(), [M]). default_prompt(N) -> %% Don't bother flattening the list irrespective of what the @@ -453,7 +456,8 @@ expand_bin_elements([{bin_element,L,E,Sz,Ts}|Fs], C) -> no_command(N) -> throw({error, - io_lib:fwrite(<<"~s: command not found">>, [erl_pp:expr(N)])}). + io_lib:fwrite(<<"~ts: command not found">>, + [erl_pp:expr(N, enc())])}). %% add_cmd(Number, Expressions, Value) %% get_cmd(Number, CurrentCommand) @@ -518,7 +522,7 @@ shell_rep(Ev, Bs0, RT, Ds0) -> {shell_rep,Ev,{value,V,Bs,Ds}} -> {V,Ev,Bs,Ds}; {shell_rep,Ev,{command_error,{Line,M,Error}}} -> - fwrite_severity(benign, <<"~w: ~s">>, + fwrite_severity(benign, <<"~w: ~ts">>, [Line, M:format_error(Error)]), {{'EXIT',Error},Ev,Bs0,Ds0}; {shell_req,Ev,get_cmd} -> @@ -570,9 +574,10 @@ report_exception(Class, Severity, {Reason,Stacktrace}, RT) -> I = iolist_size(Tag) + 1, PF = fun(Term, I1) -> pp(Term, I1, RT) end, SF = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, - io:requests([{put_chars, Tag}, - {put_chars, - lib:format_exception(I, Class, Reason, Stacktrace, SF, PF)}, + Enc = encoding(), + Str = lib:format_exception(I, Class, Reason, Stacktrace, SF, PF, Enc), + io:requests([{put_chars, latin1, Tag}, + {put_chars, unicode, Str}, nl]). start_eval(Bs, RT, Ds) -> @@ -671,7 +676,8 @@ exprs([E0|Es], Bs1, RT, Lf, Ef, Bs0, W) -> if Es =:= [] -> VS = pp(V0, 1, RT), - [io:requests([{put_chars, VS}, nl]) || W =:= cmd], + [io:requests([{put_chars, unicode, VS}, nl]) || + W =:= cmd], %% Don't send the result back if it will be %% discarded anyway. V = if @@ -753,7 +759,7 @@ used_records(E) -> {expr, E}. fwrite_severity(Severity, S, As) -> - io:fwrite(<<"~s\n">>, [format_severity(Severity, S, As)]). + io:fwrite(<<"~ts\n">>, [format_severity(Severity, S, As)]). format_severity(Severity, S, As) -> add_severity(Severity, io_lib:fwrite(S, As)). @@ -958,13 +964,13 @@ local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> RecDef = expand_value(RecDef0), RDs = lists:flatten(erl_pp:expr(RecDef)), Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]), - {ok, Tokens, _} = erl_scan:string(Attr), + {ok, Tokens, _} = erl_scan:string(Attr, 1, [unicode]), case erl_parse:parse_form(Tokens) of {ok,AttrForm} -> [RN] = add_records([AttrForm], Bs, RT), {value,RN,Bs}; {error,{_Line,M,ErrDesc}} -> - ErrStr = io_lib:fwrite(<<"~s">>, [M:format_error(ErrDesc)]), + ErrStr = io_lib:fwrite(<<"~ts">>, [M:format_error(ErrDesc)]), exit(lists:flatten(ErrStr)) end; local_func(rd, [_,_], _Bs, _Shell, _RT, _Lf, _Ef) -> @@ -988,11 +994,13 @@ local_func(rl, [A], Bs0, _Shell, RT, Lf, Ef) -> {value,list_records(record_defs(RT, listify(Recs))),Bs}; local_func(rp, [A], Bs0, _Shell, RT, Lf, Ef) -> {[V],Bs} = expr_list([A], Bs0, Lf, Ef), - W = columns(), - io:requests([{put_chars, - io_lib_pretty:print(V, 1, W, -1, ?CHAR_MAX, - record_print_fun(RT))}, - nl]), + Cs = io_lib_pretty:print(V, ([{column, 1}, + {line_length, columns()}, + {depth, -1}, + {max_chars, ?CHAR_MAX}, + {record_print_fun, record_print_fun(RT)}] + ++ enc())), + io:requests([{put_chars, unicode, Cs}, nl]), {value,ok,Bs}; local_func(rr, [A], Bs0, _Shell, RT, Lf, Ef) -> {[File],Bs} = expr_list([A], Bs0, Lf, Ef), @@ -1166,7 +1174,7 @@ add_records(RAs, Bs0, RT) -> case check_command([], Bs1) of {error,{_Line,M,ErrDesc}} -> %% A source file that has not been compiled. - ErrStr = io_lib:fwrite(<<"~s">>, [M:format_error(ErrDesc)]), + ErrStr = io_lib:fwrite(<<"~ts">>, [M:format_error(ErrDesc)]), exit(lists:flatten(ErrStr)); ok -> true = ets:insert(RT, Recs), @@ -1343,25 +1351,25 @@ list_commands([{{N,command},Es0}, {{N,result}, V} |Ds], RT) -> VS = pp(V, 4, RT), Ns = io_lib:fwrite(<<"~w: ">>, [N]), I = iolist_size(Ns), - io:requests([{put_chars, Ns}, - {format,<<"~s\n">>,[erl_pp:exprs(Es, I, none)]}, + io:requests([{put_chars, latin1, Ns}, + {format,<<"~ts\n">>,[erl_pp:exprs(Es, I, enc())]}, {format,<<"-> ">>,[]}, - {put_chars, VS}, + {put_chars, unicode, VS}, nl]), list_commands(Ds, RT); list_commands([{{N,command},Es0} |Ds], RT) -> Es = prep_list_commands(Es0), Ns = io_lib:fwrite(<<"~w: ">>, [N]), I = iolist_size(Ns), - io:requests([{put_chars, Ns}, - {format,<<"~s\n">>,[erl_pp:exprs(Es, I, none)]}]), + io:requests([{put_chars, latin1, Ns}, + {format,<<"~ts\n">>,[erl_pp:exprs(Es, I, enc())]}]), list_commands(Ds, RT); list_commands([_D|Ds], RT) -> list_commands(Ds, RT); list_commands([], _RT) -> ok. list_bindings([{{module,M},Val}|Bs], RT) -> - io:fwrite(<<"~p is ~p\n">>, [M,Val]), + io:fwrite(<<"~w is ~w\n">>, [M,Val]), list_bindings(Bs, RT); list_bindings([{Name,Val}|Bs], RT) -> case erl_eval:fun_data(Val) of @@ -1369,13 +1377,13 @@ list_bindings([{Name,Val}|Bs], RT) -> FCs = expand_value(FCs0), % looks nicer F = {'fun',0,{clauses,FCs}}, M = {match,0,{var,0,Name},F}, - io:fwrite(<<"~s\n">>, [erl_pp:expr(M)]); + io:fwrite(<<"~ts\n">>, [erl_pp:expr(M, enc())]); false -> Namel = io_lib:fwrite(<<"~s = ">>, [Name]), Nl = iolist_size(Namel)+1, ValS = pp(Val, Nl, RT), - io:requests([{put_chars, Namel}, - {put_chars, ValS}, + io:requests([{put_chars, latin1, Namel}, + {put_chars, unicode, ValS}, nl]) end, list_bindings(Bs, RT); @@ -1384,7 +1392,7 @@ list_bindings([], _RT) -> list_records(Records) -> lists:foreach(fun({_Name,Attr}) -> - io:fwrite(<<"~s">>, [erl_pp:attribute(Attr)]) + io:fwrite(<<"~ts">>, [erl_pp:attribute(Attr, enc())]) end, Records). record_defs(RT, Names) -> @@ -1427,8 +1435,20 @@ get_history_and_results() -> {History, erlang:min(Results, History)}. pp(V, I, RT) -> - io_lib_pretty:print(V, I, columns(), ?LINEMAX, ?CHAR_MAX, - record_print_fun(RT)). + pp(V, I, RT, enc()). + +pp(V, I, RT, Enc) -> + io_lib_pretty:print(V, ([{column, I}, {line_length, columns()}, + {depth, ?LINEMAX}, {max_chars, ?CHAR_MAX}, + {record_print_fun, record_print_fun(RT)}] + ++ Enc)). + +%% Control sequence 'p' possibly with Unicode translation modifier +cs_p() -> + case encoding() of + latin1 -> "p"; + unicode -> "tp" + end. columns() -> case io:columns() of @@ -1436,6 +1456,16 @@ columns() -> _ -> 80 end. +encoding() -> + [{encoding, Encoding}] = enc(), + Encoding. + +enc() -> + case lists:keyfind(encoding, 1, io:getopts()) of + false -> [{encoding,latin1}]; % should never happen + Enc -> [Enc] + end. + garb(Shell) -> erlang:garbage_collect(Shell), catch erlang:garbage_collect(whereis(user)), @@ -1458,7 +1488,8 @@ check_env(V) -> ok; {ok, Val} -> Txt = io_lib:fwrite( - <<"Invalid value of STDLIB configuration parameter ~p: ~p\n">>, + ("Invalid value of STDLIB configuration parameter ~w: ~" + ++cs_p()++"\n"), [V, Val]), error_logger:info_report(lists:flatten(Txt)) end. |