aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2019-05-02 09:46:50 +0200
committerHans Bolinder <[email protected]>2019-05-03 09:57:17 +0200
commitcfb22ab07199e68aae1a69203a67ea1e4520f9bf (patch)
treeac7e40f456f4a2c988e5213b1556e9642e28188d
parent7ac51a967ecf75d692b40d6fac62f838d4ae510c (diff)
downloadotp-cfb22ab07199e68aae1a69203a67ea1e4520f9bf.tar.gz
otp-cfb22ab07199e68aae1a69203a67ea1e4520f9bf.tar.bz2
otp-cfb22ab07199e68aae1a69203a67ea1e4520f9bf.zip
stdlib: Optimize pretty printing of strings with chars_limit
Avoid traversing all of string arguments when limiting the output with the 'chars_limit' option.
-rw-r--r--lib/stdlib/src/io_lib_format.erl110
-rw-r--r--lib/stdlib/test/io_SUITE.erl36
2 files changed, 108 insertions, 38 deletions
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index d1aa4cd157..157cc07e19 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -327,11 +327,11 @@ indentation([], I) -> I.
%% PadChar, Encoding, StringP, ChrsLim, Indentation) -> String
%% These are the dispatch functions for the various formatting controls.
-control_small($s, [A], F, Adj, P, Pad, latin1) when is_atom(A) ->
+control_small($s, [A], F, Adj, P, Pad, latin1=Enc) when is_atom(A) ->
L = iolist_to_chars(atom_to_list(A)),
- string(L, F, Adj, P, Pad);
-control_small($s, [A], F, Adj, P, Pad, unicode) when is_atom(A) ->
- string(atom_to_list(A), F, Adj, P, Pad);
+ string(L, F, Adj, P, Pad, Enc);
+control_small($s, [A], F, Adj, P, Pad, unicode=Enc) when is_atom(A) ->
+ string(atom_to_list(A), F, Adj, P, Pad, Enc);
control_small($e, [A], F, Adj, P, Pad, _Enc) when is_float(A) ->
fwrite_e(A, F, Adj, P, Pad);
control_small($f, [A], F, Adj, P, Pad, _Enc) when is_float(A) ->
@@ -371,12 +371,12 @@ control_small($n, [], F, Adj, P, Pad, _Enc) -> newline(F, Adj, P, Pad);
control_small($i, [_A], _F, _Adj, _P, _Pad, _Enc) -> [];
control_small(_C, _As, _F, _Adj, _P, _Pad, _Enc) -> not_small.
-control_limited($s, [L0], F, Adj, P, Pad, latin1, _Str, CL, _I) ->
- L = iolist_to_chars(L0),
- string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad);
-control_limited($s, [L0], F, Adj, P, Pad, unicode, _Str, CL, _I) ->
- L = cdata_to_chars(L0),
- uniconv(string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad));
+control_limited($s, [L0], F, Adj, P, Pad, latin1=Enc, _Str, CL, _I) ->
+ L = iolist_to_chars(L0, F, CL),
+ string(L, limit_field(F, CL), Adj, P, Pad, Enc);
+control_limited($s, [L0], F, Adj, P, Pad, unicode=Enc, _Str, CL, _I) ->
+ L = cdata_to_chars(L0, F, CL),
+ uniconv(string(L, limit_field(F, CL), Adj, P, Pad, Enc));
control_limited($w, [A], F, Adj, P, Pad, Enc, _Str, CL, _I) ->
Chars = io_lib:write(A, [{depth, -1}, {encoding, Enc}, {chars_limit, CL}]),
term(Chars, F, Adj, P, Pad);
@@ -718,7 +718,10 @@ fwrite_g(Fl, F, Adj, P, Pad) when P >= 1 ->
end.
-%% iolist_to_chars(iolist()) -> io_lib:chars()
+iolist_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F ->
+ iolist_to_chars(Cs);
+iolist_to_chars(Cs, _, CharsLimit) ->
+ limit_iolist_to_chars(Cs, sub(CharsLimit, 3), [], normal). % three dots
iolist_to_chars([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 ->
[C | iolist_to_chars(Cs)];
@@ -729,12 +732,34 @@ iolist_to_chars([]) ->
iolist_to_chars(B) when is_binary(B) ->
binary_to_list(B).
-%% cdata() :: clist() | cbinary()
-%% clist() :: maybe_improper_list(char() | cbinary() | clist(),
-%% cbinary() | nil())
-%% cbinary() :: unicode:unicode_binary() | unicode:latin1_binary()
+limit_iolist_to_chars(Cs, 0, S, normal) ->
+ L = limit_iolist_to_chars(Cs, 4, S, final),
+ case iolist_size(L) of
+ N when N < 4 -> L;
+ 4 -> "..."
+ end;
+limit_iolist_to_chars(_Cs, 0, _S, final) -> [];
+limit_iolist_to_chars([C|Cs], Limit, S, Mode) when C >= $\000, C =< $\377 ->
+ [C | limit_iolist_to_chars(Cs, Limit - 1, S, Mode)];
+limit_iolist_to_chars([I|Cs], Limit, S, Mode) ->
+ limit_iolist_to_chars(I, Limit, [Cs|S], Mode);
+limit_iolist_to_chars([], _Limit, [], _Mode) ->
+ [];
+limit_iolist_to_chars([], Limit, [Cs|S], Mode) ->
+ limit_iolist_to_chars(Cs, Limit, S, Mode);
+limit_iolist_to_chars(B, Limit, S, Mode) when is_binary(B) ->
+ case byte_size(B) of
+ Sz when Sz > Limit ->
+ {B1, B2} = split_binary(B, Limit),
+ [binary_to_list(B1) | limit_iolist_to_chars(B2, 0, S, Mode)];
+ Sz ->
+ [binary_to_list(B) | limit_iolist_to_chars([], Limit-Sz, S, Mode)]
+ end.
-%% cdata_to_chars(cdata()) -> io_lib:chars()
+cdata_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F ->
+ cdata_to_chars(Cs);
+cdata_to_chars(Cs, _, CharsLimit) ->
+ limit_cdata_to_chars(Cs, sub(CharsLimit, 3), normal). % three dots
cdata_to_chars([C|Cs]) when is_integer(C), C >= $\000 ->
[C | cdata_to_chars(Cs)];
@@ -748,11 +773,25 @@ cdata_to_chars(B) when is_binary(B) ->
_ -> binary_to_list(B)
end.
-limit_string(S, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F -> S;
-limit_string(S, _F, CharsLimit) ->
- case io_lib:chars_length(S) =< CharsLimit of
- true -> S;
- false -> [string:slice(S, 0, sub(CharsLimit, 3)), "..."]
+limit_cdata_to_chars(Cs, 0, normal) ->
+ L = limit_cdata_to_chars(Cs, 4, final),
+ case string:length(L) of
+ N when N < 4 -> L;
+ 4 -> "..."
+ end;
+limit_cdata_to_chars(_Cs, 0, final) -> [];
+limit_cdata_to_chars(Cs, Limit, Mode) ->
+ case string:next_grapheme(Cs) of
+ {error, <<C,Cs1/binary>>} ->
+ %% This is how ~ts handles Latin1 binaries with option
+ %% chars_limit.
+ [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)];
+ {error, [C|Cs1]} -> % not all versions of module string return this
+ [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)];
+ [] ->
+ [];
+ [GC|Cs1] ->
+ [GC | limit_cdata_to_chars(Cs1, Limit - 1, Mode)]
end.
limit_field(F, CharsLimit) when CharsLimit < 0; F =:= none ->
@@ -762,30 +801,30 @@ limit_field(F, CharsLimit) ->
%% string(String, Field, Adjust, Precision, PadChar)
-string(S, none, _Adj, none, _Pad) -> S;
-string(S, F, Adj, none, Pad) ->
- string_field(S, F, Adj, io_lib:chars_length(S), Pad);
-string(S, none, _Adj, P, Pad) ->
- string_field(S, P, left, io_lib:chars_length(S), Pad);
-string(S, F, Adj, P, Pad) when F >= P ->
+string(S, none, _Adj, none, _Pad, _Enc) -> S;
+string(S, F, Adj, none, Pad, Enc) ->
+ string_field(S, F, Adj, io_lib:chars_length(S), Pad, Enc);
+string(S, none, _Adj, P, Pad, Enc) ->
+ string_field(S, P, left, io_lib:chars_length(S), Pad, Enc);
+string(S, F, Adj, P, Pad, Enc) when F >= P ->
N = io_lib:chars_length(S),
if F > P ->
if N > P ->
- adjust(flat_trunc(S, P), chars(Pad, F-P), Adj);
+ adjust(flat_trunc(S, P, Enc), chars(Pad, F-P), Adj);
N < P ->
adjust([S|chars(Pad, P-N)], chars(Pad, F-P), Adj);
true -> % N == P
adjust(S, chars(Pad, F-P), Adj)
end;
true -> % F == P
- string_field(S, F, Adj, N, Pad)
+ string_field(S, F, Adj, N, Pad, Enc)
end.
-string_field(S, F, _Adj, N, _Pad) when N > F ->
- flat_trunc(S, F);
-string_field(S, F, Adj, N, Pad) when N < F ->
+string_field(S, F, _Adj, N, _Pad, Enc) when N > F ->
+ flat_trunc(S, F, Enc);
+string_field(S, F, Adj, N, Pad, _Enc) when N < F ->
adjust(S, chars(Pad, F-N), Adj);
-string_field(S, _, _, _, _) -> % N == F
+string_field(S, _, _, _, _, _) -> % N == F
S.
%% unprefixed_integer(Int, Field, Adjust, Base, PadChar, Lowercase)
@@ -837,7 +876,10 @@ adjust(Data, Pad, right) -> [Pad|Data].
%% Flatten and truncate a deep list to at most N elements.
-flat_trunc(List, N) when is_integer(N), N >= 0 ->
+flat_trunc(List, N, latin1) when is_integer(N), N >= 0 ->
+ {S, _} = lists:split(N, lists:flatten(List)),
+ S;
+flat_trunc(List, N, unicode) when is_integer(N), N >= 0 ->
string:slice(List, 0, N).
%% A deep version of lists:duplicate/2
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index 824f5d19f2..9b6d8d7401 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -32,7 +32,7 @@
io_with_huge_message_queue/1, format_string/1,
maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1,
otp_14285/1, limit_term/1, otp_14983/1, otp_15103/1, otp_15076/1,
- otp_15159/1, otp_15639/1]).
+ otp_15159/1, otp_15639/1, otp_15705/1]).
-export([pretty/2, trf/3]).
@@ -65,7 +65,7 @@ all() ->
io_lib_width_too_small, io_with_huge_message_queue,
format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175,
otp_14285, limit_term, otp_14983, otp_15103, otp_15076, otp_15159,
- otp_15639].
+ otp_15639, otp_15705].
%% Error cases for output.
error_1(Config) when is_list(Config) ->
@@ -2504,9 +2504,11 @@ otp_14983(_Config) ->
trunc_string() ->
"str " = trf("str ", [], 10),
- "str ..." = trf("str ~s", ["str"], 6),
+ "str str" = trf("str ~s", ["str"], 6),
+ "str ..." = trf("str ~s", ["str1"], 6),
"str str" = trf("str ~s", ["str"], 7),
- "str ..." = trf("str ~8s", ["str"], 6),
+ "str str" = trf("str ~8s", ["str"], 6),
+ "str ..." = trf("str ~8s", ["str1"], 6),
Pa = filename:dirname(code:which(?MODULE)),
{ok, UNode} = test_server:start_node(printable_range_unicode, slave,
[{args, " +pc unicode -pa " ++ Pa}]),
@@ -2680,3 +2682,29 @@ otp_15639(_Config) ->
"\"12345678\"..." = pretty("123456789"++[x], UOpts),
"[[...]|...]" = pretty(["1","2","3","4","5","6","7","8"], UOpts),
ok.
+
+otp_15705(_Config) ->
+ L = [<<"an">>,["at"],[["om"]]],
+ "..." = trf("~s", [L], 0),
+ "..." = trf("~s", [L], 1),
+ "..." = trf("~s", [L], 2),
+ "..." = trf("~s", [L], 3),
+ "a..." = trf("~s", [L], 4),
+ "an..." = trf("~s", [L], 5),
+ "anatom" = trf("~s", [L], 6),
+ L2 = ["a",[<<"na">>],[["tom"]]],
+ "..." = trf("~s", [L2], 3),
+ "a..." = trf("~s", [L2], 4),
+ "an..." = trf("~s", [L2], 5),
+ "anatom" = trf("~s", [L2], 6),
+
+ A = [[<<"äpple"/utf8>>, "plus", <<"äpple">>]],
+ "äp..." = trf("~ts", [A], 5),
+ "äppleplusäpple" = trf("~ts", [A], 14),
+ U = [["ки"],"рилл","и́ческий атом"],
+ "ки..." = trf("~ts", [U], 5),
+ "кирилли́ческий..." = trf("~ts", [U], 16),
+ "кирилли́ческий атом" = trf("~ts", [U], 20),
+
+ "|кирилли́чес|" = trf("|~10ts|", [U], -1),
+ ok.