From cfb22ab07199e68aae1a69203a67ea1e4520f9bf Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 2 May 2019 09:46:50 +0200 Subject: stdlib: Optimize pretty printing of strings with chars_limit Avoid traversing all of string arguments when limiting the output with the 'chars_limit' option. --- lib/stdlib/src/io_lib_format.erl | 110 +++++++++++++++++++++++++++------------ lib/stdlib/test/io_SUITE.erl | 36 +++++++++++-- 2 files changed, 108 insertions(+), 38 deletions(-) diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index d1aa4cd157..157cc07e19 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -327,11 +327,11 @@ indentation([], I) -> I. %% PadChar, Encoding, StringP, ChrsLim, Indentation) -> String %% These are the dispatch functions for the various formatting controls. -control_small($s, [A], F, Adj, P, Pad, latin1) when is_atom(A) -> +control_small($s, [A], F, Adj, P, Pad, latin1=Enc) when is_atom(A) -> L = iolist_to_chars(atom_to_list(A)), - string(L, F, Adj, P, Pad); -control_small($s, [A], F, Adj, P, Pad, unicode) when is_atom(A) -> - string(atom_to_list(A), F, Adj, P, Pad); + string(L, F, Adj, P, Pad, Enc); +control_small($s, [A], F, Adj, P, Pad, unicode=Enc) when is_atom(A) -> + string(atom_to_list(A), F, Adj, P, Pad, Enc); control_small($e, [A], F, Adj, P, Pad, _Enc) when is_float(A) -> fwrite_e(A, F, Adj, P, Pad); control_small($f, [A], F, Adj, P, Pad, _Enc) when is_float(A) -> @@ -371,12 +371,12 @@ control_small($n, [], F, Adj, P, Pad, _Enc) -> newline(F, Adj, P, Pad); control_small($i, [_A], _F, _Adj, _P, _Pad, _Enc) -> []; control_small(_C, _As, _F, _Adj, _P, _Pad, _Enc) -> not_small. -control_limited($s, [L0], F, Adj, P, Pad, latin1, _Str, CL, _I) -> - L = iolist_to_chars(L0), - string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad); -control_limited($s, [L0], F, Adj, P, Pad, unicode, _Str, CL, _I) -> - L = cdata_to_chars(L0), - uniconv(string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad)); +control_limited($s, [L0], F, Adj, P, Pad, latin1=Enc, _Str, CL, _I) -> + L = iolist_to_chars(L0, F, CL), + string(L, limit_field(F, CL), Adj, P, Pad, Enc); +control_limited($s, [L0], F, Adj, P, Pad, unicode=Enc, _Str, CL, _I) -> + L = cdata_to_chars(L0, F, CL), + uniconv(string(L, limit_field(F, CL), Adj, P, Pad, Enc)); control_limited($w, [A], F, Adj, P, Pad, Enc, _Str, CL, _I) -> Chars = io_lib:write(A, [{depth, -1}, {encoding, Enc}, {chars_limit, CL}]), term(Chars, F, Adj, P, Pad); @@ -718,7 +718,10 @@ fwrite_g(Fl, F, Adj, P, Pad) when P >= 1 -> end. -%% iolist_to_chars(iolist()) -> io_lib:chars() +iolist_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F -> + iolist_to_chars(Cs); +iolist_to_chars(Cs, _, CharsLimit) -> + limit_iolist_to_chars(Cs, sub(CharsLimit, 3), [], normal). % three dots iolist_to_chars([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> [C | iolist_to_chars(Cs)]; @@ -729,12 +732,34 @@ iolist_to_chars([]) -> iolist_to_chars(B) when is_binary(B) -> binary_to_list(B). -%% cdata() :: clist() | cbinary() -%% clist() :: maybe_improper_list(char() | cbinary() | clist(), -%% cbinary() | nil()) -%% cbinary() :: unicode:unicode_binary() | unicode:latin1_binary() +limit_iolist_to_chars(Cs, 0, S, normal) -> + L = limit_iolist_to_chars(Cs, 4, S, final), + case iolist_size(L) of + N when N < 4 -> L; + 4 -> "..." + end; +limit_iolist_to_chars(_Cs, 0, _S, final) -> []; +limit_iolist_to_chars([C|Cs], Limit, S, Mode) when C >= $\000, C =< $\377 -> + [C | limit_iolist_to_chars(Cs, Limit - 1, S, Mode)]; +limit_iolist_to_chars([I|Cs], Limit, S, Mode) -> + limit_iolist_to_chars(I, Limit, [Cs|S], Mode); +limit_iolist_to_chars([], _Limit, [], _Mode) -> + []; +limit_iolist_to_chars([], Limit, [Cs|S], Mode) -> + limit_iolist_to_chars(Cs, Limit, S, Mode); +limit_iolist_to_chars(B, Limit, S, Mode) when is_binary(B) -> + case byte_size(B) of + Sz when Sz > Limit -> + {B1, B2} = split_binary(B, Limit), + [binary_to_list(B1) | limit_iolist_to_chars(B2, 0, S, Mode)]; + Sz -> + [binary_to_list(B) | limit_iolist_to_chars([], Limit-Sz, S, Mode)] + end. -%% cdata_to_chars(cdata()) -> io_lib:chars() +cdata_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F -> + cdata_to_chars(Cs); +cdata_to_chars(Cs, _, CharsLimit) -> + limit_cdata_to_chars(Cs, sub(CharsLimit, 3), normal). % three dots cdata_to_chars([C|Cs]) when is_integer(C), C >= $\000 -> [C | cdata_to_chars(Cs)]; @@ -748,11 +773,25 @@ cdata_to_chars(B) when is_binary(B) -> _ -> binary_to_list(B) end. -limit_string(S, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F -> S; -limit_string(S, _F, CharsLimit) -> - case io_lib:chars_length(S) =< CharsLimit of - true -> S; - false -> [string:slice(S, 0, sub(CharsLimit, 3)), "..."] +limit_cdata_to_chars(Cs, 0, normal) -> + L = limit_cdata_to_chars(Cs, 4, final), + case string:length(L) of + N when N < 4 -> L; + 4 -> "..." + end; +limit_cdata_to_chars(_Cs, 0, final) -> []; +limit_cdata_to_chars(Cs, Limit, Mode) -> + case string:next_grapheme(Cs) of + {error, <>} -> + %% This is how ~ts handles Latin1 binaries with option + %% chars_limit. + [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)]; + {error, [C|Cs1]} -> % not all versions of module string return this + [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)]; + [] -> + []; + [GC|Cs1] -> + [GC | limit_cdata_to_chars(Cs1, Limit - 1, Mode)] end. limit_field(F, CharsLimit) when CharsLimit < 0; F =:= none -> @@ -762,30 +801,30 @@ limit_field(F, CharsLimit) -> %% string(String, Field, Adjust, Precision, PadChar) -string(S, none, _Adj, none, _Pad) -> S; -string(S, F, Adj, none, Pad) -> - string_field(S, F, Adj, io_lib:chars_length(S), Pad); -string(S, none, _Adj, P, Pad) -> - string_field(S, P, left, io_lib:chars_length(S), Pad); -string(S, F, Adj, P, Pad) when F >= P -> +string(S, none, _Adj, none, _Pad, _Enc) -> S; +string(S, F, Adj, none, Pad, Enc) -> + string_field(S, F, Adj, io_lib:chars_length(S), Pad, Enc); +string(S, none, _Adj, P, Pad, Enc) -> + string_field(S, P, left, io_lib:chars_length(S), Pad, Enc); +string(S, F, Adj, P, Pad, Enc) when F >= P -> N = io_lib:chars_length(S), if F > P -> if N > P -> - adjust(flat_trunc(S, P), chars(Pad, F-P), Adj); + adjust(flat_trunc(S, P, Enc), chars(Pad, F-P), Adj); N < P -> adjust([S|chars(Pad, P-N)], chars(Pad, F-P), Adj); true -> % N == P adjust(S, chars(Pad, F-P), Adj) end; true -> % F == P - string_field(S, F, Adj, N, Pad) + string_field(S, F, Adj, N, Pad, Enc) end. -string_field(S, F, _Adj, N, _Pad) when N > F -> - flat_trunc(S, F); -string_field(S, F, Adj, N, Pad) when N < F -> +string_field(S, F, _Adj, N, _Pad, Enc) when N > F -> + flat_trunc(S, F, Enc); +string_field(S, F, Adj, N, Pad, _Enc) when N < F -> adjust(S, chars(Pad, F-N), Adj); -string_field(S, _, _, _, _) -> % N == F +string_field(S, _, _, _, _, _) -> % N == F S. %% unprefixed_integer(Int, Field, Adjust, Base, PadChar, Lowercase) @@ -837,7 +876,10 @@ adjust(Data, Pad, right) -> [Pad|Data]. %% Flatten and truncate a deep list to at most N elements. -flat_trunc(List, N) when is_integer(N), N >= 0 -> +flat_trunc(List, N, latin1) when is_integer(N), N >= 0 -> + {S, _} = lists:split(N, lists:flatten(List)), + S; +flat_trunc(List, N, unicode) when is_integer(N), N >= 0 -> string:slice(List, 0, N). %% A deep version of lists:duplicate/2 diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 824f5d19f2..9b6d8d7401 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -32,7 +32,7 @@ io_with_huge_message_queue/1, format_string/1, maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1, otp_14285/1, limit_term/1, otp_14983/1, otp_15103/1, otp_15076/1, - otp_15159/1, otp_15639/1]). + otp_15159/1, otp_15639/1, otp_15705/1]). -export([pretty/2, trf/3]). @@ -65,7 +65,7 @@ all() -> io_lib_width_too_small, io_with_huge_message_queue, format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175, otp_14285, limit_term, otp_14983, otp_15103, otp_15076, otp_15159, - otp_15639]. + otp_15639, otp_15705]. %% Error cases for output. error_1(Config) when is_list(Config) -> @@ -2504,9 +2504,11 @@ otp_14983(_Config) -> trunc_string() -> "str " = trf("str ", [], 10), - "str ..." = trf("str ~s", ["str"], 6), + "str str" = trf("str ~s", ["str"], 6), + "str ..." = trf("str ~s", ["str1"], 6), "str str" = trf("str ~s", ["str"], 7), - "str ..." = trf("str ~8s", ["str"], 6), + "str str" = trf("str ~8s", ["str"], 6), + "str ..." = trf("str ~8s", ["str1"], 6), Pa = filename:dirname(code:which(?MODULE)), {ok, UNode} = test_server:start_node(printable_range_unicode, slave, [{args, " +pc unicode -pa " ++ Pa}]), @@ -2680,3 +2682,29 @@ otp_15639(_Config) -> "\"12345678\"..." = pretty("123456789"++[x], UOpts), "[[...]|...]" = pretty(["1","2","3","4","5","6","7","8"], UOpts), ok. + +otp_15705(_Config) -> + L = [<<"an">>,["at"],[["om"]]], + "..." = trf("~s", [L], 0), + "..." = trf("~s", [L], 1), + "..." = trf("~s", [L], 2), + "..." = trf("~s", [L], 3), + "a..." = trf("~s", [L], 4), + "an..." = trf("~s", [L], 5), + "anatom" = trf("~s", [L], 6), + L2 = ["a",[<<"na">>],[["tom"]]], + "..." = trf("~s", [L2], 3), + "a..." = trf("~s", [L2], 4), + "an..." = trf("~s", [L2], 5), + "anatom" = trf("~s", [L2], 6), + + A = [[<<"äpple"/utf8>>, "plus", <<"äpple">>]], + "äp..." = trf("~ts", [A], 5), + "äppleplusäpple" = trf("~ts", [A], 14), + U = [["ки"],"рилл","и́ческий атом"], + "ки..." = trf("~ts", [U], 5), + "кирилли́ческий..." = trf("~ts", [U], 16), + "кирилли́ческий атом" = trf("~ts", [U], 20), + + "|кирилли́чес|" = trf("|~10ts|", [U], -1), + ok. -- cgit v1.2.3