From cfb22ab07199e68aae1a69203a67ea1e4520f9bf Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Thu, 2 May 2019 09:46:50 +0200
Subject: stdlib: Optimize pretty printing of strings with chars_limit

Avoid traversing all of string arguments when limiting the output with
the 'chars_limit' option.
---
 lib/stdlib/src/io_lib_format.erl | 110 +++++++++++++++++++++++++++------------
 lib/stdlib/test/io_SUITE.erl     |  36 +++++++++++--
 2 files changed, 108 insertions(+), 38 deletions(-)

diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index d1aa4cd157..157cc07e19 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -327,11 +327,11 @@ indentation([], I) -> I.
 %%                 PadChar, Encoding, StringP, ChrsLim, Indentation) -> String
 %%  These are the dispatch functions for the various formatting controls.
 
-control_small($s, [A], F, Adj, P, Pad, latin1) when is_atom(A) ->
+control_small($s, [A], F, Adj, P, Pad, latin1=Enc) when is_atom(A) ->
     L = iolist_to_chars(atom_to_list(A)),
-    string(L, F, Adj, P, Pad);
-control_small($s, [A], F, Adj, P, Pad, unicode) when is_atom(A) ->
-    string(atom_to_list(A), F, Adj, P, Pad);
+    string(L, F, Adj, P, Pad, Enc);
+control_small($s, [A], F, Adj, P, Pad, unicode=Enc) when is_atom(A) ->
+    string(atom_to_list(A), F, Adj, P, Pad, Enc);
 control_small($e, [A], F, Adj, P, Pad, _Enc) when is_float(A) ->
     fwrite_e(A, F, Adj, P, Pad);
 control_small($f, [A], F, Adj, P, Pad, _Enc) when is_float(A) ->
@@ -371,12 +371,12 @@ control_small($n, [], F, Adj, P, Pad, _Enc) -> newline(F, Adj, P, Pad);
 control_small($i, [_A], _F, _Adj, _P, _Pad, _Enc) -> [];
 control_small(_C, _As, _F, _Adj, _P, _Pad, _Enc) -> not_small.
 
-control_limited($s, [L0], F, Adj, P, Pad, latin1, _Str, CL, _I) ->
-    L = iolist_to_chars(L0),
-    string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad);
-control_limited($s, [L0], F, Adj, P, Pad, unicode, _Str, CL, _I) ->
-    L = cdata_to_chars(L0),
-    uniconv(string(limit_string(L, F, CL), limit_field(F, CL), Adj, P, Pad));
+control_limited($s, [L0], F, Adj, P, Pad, latin1=Enc, _Str, CL, _I) ->
+    L = iolist_to_chars(L0, F, CL),
+    string(L, limit_field(F, CL), Adj, P, Pad, Enc);
+control_limited($s, [L0], F, Adj, P, Pad, unicode=Enc, _Str, CL, _I) ->
+    L = cdata_to_chars(L0, F, CL),
+    uniconv(string(L, limit_field(F, CL), Adj, P, Pad, Enc));
 control_limited($w, [A], F, Adj, P, Pad, Enc, _Str, CL, _I) ->
     Chars = io_lib:write(A, [{depth, -1}, {encoding, Enc}, {chars_limit, CL}]),
     term(Chars, F, Adj, P, Pad);
@@ -718,7 +718,10 @@ fwrite_g(Fl, F, Adj, P, Pad) when P >= 1 ->
     end.
 
 
-%% iolist_to_chars(iolist()) -> io_lib:chars()
+iolist_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F ->
+    iolist_to_chars(Cs);
+iolist_to_chars(Cs, _, CharsLimit) ->
+    limit_iolist_to_chars(Cs, sub(CharsLimit, 3), [], normal). % three dots
 
 iolist_to_chars([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 ->
     [C | iolist_to_chars(Cs)];
@@ -729,12 +732,34 @@ iolist_to_chars([]) ->
 iolist_to_chars(B) when is_binary(B) ->
     binary_to_list(B).
 
-%% cdata() :: clist() | cbinary()
-%% clist() ::  maybe_improper_list(char() | cbinary() | clist(),
-%%                                 cbinary() | nil())
-%% cbinary() :: unicode:unicode_binary() | unicode:latin1_binary()
+limit_iolist_to_chars(Cs, 0, S, normal) ->
+    L = limit_iolist_to_chars(Cs, 4, S, final),
+    case iolist_size(L) of
+        N when N < 4 -> L;
+        4 -> "..."
+    end;
+limit_iolist_to_chars(_Cs, 0, _S, final) -> [];
+limit_iolist_to_chars([C|Cs], Limit, S, Mode) when C >= $\000, C =< $\377 ->
+    [C | limit_iolist_to_chars(Cs, Limit - 1, S, Mode)];
+limit_iolist_to_chars([I|Cs], Limit, S, Mode) ->
+    limit_iolist_to_chars(I, Limit, [Cs|S], Mode);
+limit_iolist_to_chars([], _Limit, [], _Mode) ->
+    [];
+limit_iolist_to_chars([], Limit, [Cs|S], Mode) ->
+    limit_iolist_to_chars(Cs, Limit, S, Mode);
+limit_iolist_to_chars(B, Limit, S, Mode) when is_binary(B) ->
+    case byte_size(B) of
+        Sz when Sz > Limit ->
+            {B1, B2} = split_binary(B, Limit),
+            [binary_to_list(B1) | limit_iolist_to_chars(B2, 0, S, Mode)];
+        Sz ->
+            [binary_to_list(B) | limit_iolist_to_chars([], Limit-Sz, S, Mode)]
+    end.
 
-%% cdata_to_chars(cdata()) -> io_lib:chars()
+cdata_to_chars(Cs, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F ->
+    cdata_to_chars(Cs);
+cdata_to_chars(Cs, _, CharsLimit) ->
+    limit_cdata_to_chars(Cs, sub(CharsLimit, 3), normal). % three dots
 
 cdata_to_chars([C|Cs]) when is_integer(C), C >= $\000 ->
     [C | cdata_to_chars(Cs)];
@@ -748,11 +773,25 @@ cdata_to_chars(B) when is_binary(B) ->
         _ -> binary_to_list(B)
     end.
 
-limit_string(S, F, CharsLimit) when CharsLimit < 0; CharsLimit >= F -> S;
-limit_string(S, _F, CharsLimit) ->
-    case io_lib:chars_length(S) =< CharsLimit of
-        true -> S;
-        false -> [string:slice(S, 0, sub(CharsLimit, 3)), "..."]
+limit_cdata_to_chars(Cs, 0, normal) ->
+    L = limit_cdata_to_chars(Cs, 4, final),
+    case string:length(L) of
+        N when N < 4 -> L;
+        4 -> "..."
+    end;
+limit_cdata_to_chars(_Cs, 0, final) -> [];
+limit_cdata_to_chars(Cs, Limit, Mode) ->
+    case string:next_grapheme(Cs) of
+        {error, <<C,Cs1/binary>>} ->
+            %% This is how ~ts handles Latin1 binaries with option
+            %% chars_limit.
+            [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)];
+        {error, [C|Cs1]} -> % not all versions of module string return this
+            [C | limit_cdata_to_chars(Cs1, Limit - 1, Mode)];
+        [] ->
+            [];
+        [GC|Cs1] ->
+            [GC | limit_cdata_to_chars(Cs1, Limit - 1, Mode)]
     end.
 
 limit_field(F, CharsLimit) when CharsLimit < 0; F =:= none ->
@@ -762,30 +801,30 @@ limit_field(F, CharsLimit) ->
 
 %% string(String, Field, Adjust, Precision, PadChar)
 
-string(S, none, _Adj, none, _Pad) -> S;
-string(S, F, Adj, none, Pad) ->
-    string_field(S, F, Adj, io_lib:chars_length(S), Pad);
-string(S, none, _Adj, P, Pad) ->
-    string_field(S, P, left, io_lib:chars_length(S), Pad);
-string(S, F, Adj, P, Pad) when F >= P ->
+string(S, none, _Adj, none, _Pad, _Enc) -> S;
+string(S, F, Adj, none, Pad, Enc) ->
+    string_field(S, F, Adj, io_lib:chars_length(S), Pad, Enc);
+string(S, none, _Adj, P, Pad, Enc) ->
+    string_field(S, P, left, io_lib:chars_length(S), Pad, Enc);
+string(S, F, Adj, P, Pad, Enc) when F >= P ->
     N = io_lib:chars_length(S),
     if F > P ->
 	    if N > P ->
-		    adjust(flat_trunc(S, P), chars(Pad, F-P), Adj);
+		    adjust(flat_trunc(S, P, Enc), chars(Pad, F-P), Adj);
 	       N < P ->
 		    adjust([S|chars(Pad, P-N)], chars(Pad, F-P), Adj);
 	       true -> % N == P
 		    adjust(S, chars(Pad, F-P), Adj)
 	    end;
        true -> % F == P
-	    string_field(S, F, Adj, N, Pad)
+	    string_field(S, F, Adj, N, Pad, Enc)
     end.
 
-string_field(S, F, _Adj, N, _Pad) when N > F ->
-    flat_trunc(S, F);
-string_field(S, F, Adj, N, Pad) when N < F ->
+string_field(S, F, _Adj, N, _Pad, Enc) when N > F ->
+    flat_trunc(S, F, Enc);
+string_field(S, F, Adj, N, Pad, _Enc) when N < F ->
     adjust(S, chars(Pad, F-N), Adj);
-string_field(S, _, _, _, _) -> % N == F
+string_field(S, _, _, _, _, _) -> % N == F
     S.
 
 %% unprefixed_integer(Int, Field, Adjust, Base, PadChar, Lowercase)
@@ -837,7 +876,10 @@ adjust(Data, Pad, right) -> [Pad|Data].
 
 %% Flatten and truncate a deep list to at most N elements.
 
-flat_trunc(List, N) when is_integer(N), N >= 0 ->
+flat_trunc(List, N, latin1) when is_integer(N), N >= 0 ->
+    {S, _} = lists:split(N, lists:flatten(List)),
+    S;
+flat_trunc(List, N, unicode) when is_integer(N), N >= 0 ->
     string:slice(List, 0, N).
 
 %% A deep version of lists:duplicate/2
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index 824f5d19f2..9b6d8d7401 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -32,7 +32,7 @@
          io_with_huge_message_queue/1, format_string/1,
 	 maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1,
          otp_14285/1, limit_term/1, otp_14983/1, otp_15103/1, otp_15076/1,
-         otp_15159/1, otp_15639/1]).
+         otp_15159/1, otp_15639/1, otp_15705/1]).
 
 -export([pretty/2, trf/3]).
 
@@ -65,7 +65,7 @@ all() ->
      io_lib_width_too_small, io_with_huge_message_queue,
      format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175,
      otp_14285, limit_term, otp_14983, otp_15103, otp_15076, otp_15159,
-     otp_15639].
+     otp_15639, otp_15705].
 
 %% Error cases for output.
 error_1(Config) when is_list(Config) ->
@@ -2504,9 +2504,11 @@ otp_14983(_Config) ->
 
 trunc_string() ->
     "str " = trf("str ", [], 10),
-    "str ..." = trf("str ~s", ["str"], 6),
+    "str str" = trf("str ~s", ["str"], 6),
+    "str ..." = trf("str ~s", ["str1"], 6),
     "str str" = trf("str ~s", ["str"], 7),
-    "str ..." = trf("str ~8s", ["str"], 6),
+    "str str" = trf("str ~8s", ["str"], 6),
+    "str ..." = trf("str ~8s", ["str1"], 6),
     Pa = filename:dirname(code:which(?MODULE)),
     {ok, UNode} = test_server:start_node(printable_range_unicode, slave,
 					 [{args, " +pc unicode -pa " ++ Pa}]),
@@ -2680,3 +2682,29 @@ otp_15639(_Config) ->
     "\"12345678\"..." = pretty("123456789"++[x], UOpts),
     "[[...]|...]" = pretty(["1","2","3","4","5","6","7","8"], UOpts),
     ok.
+
+otp_15705(_Config) ->
+    L = [<<"an">>,["at"],[["om"]]],
+    "..." = trf("~s", [L], 0),
+    "..." = trf("~s", [L], 1),
+    "..." = trf("~s", [L], 2),
+    "..." = trf("~s", [L], 3),
+    "a..." = trf("~s", [L], 4),
+    "an..." = trf("~s", [L], 5),
+    "anatom" = trf("~s", [L], 6),
+    L2 = ["a",[<<"na">>],[["tom"]]],
+    "..." = trf("~s", [L2], 3),
+    "a..." = trf("~s", [L2], 4),
+    "an..." = trf("~s", [L2], 5),
+    "anatom" = trf("~s", [L2], 6),
+
+    A = [[<<"äpple"/utf8>>, "plus", <<"äpple">>]],
+    "äp..." = trf("~ts", [A], 5),
+    "äppleplusäpple" = trf("~ts", [A], 14),
+    U = [["ки"],"рилл","и́ческий атом"],
+    "ки..." = trf("~ts", [U], 5),
+    "кирилли́ческий..." = trf("~ts", [U], 16),
+    "кирилли́ческий атом" = trf("~ts", [U], 20),
+
+    "|кирилли́чес|" = trf("|~10ts|", [U], -1),
+    ok.
-- 
cgit v1.2.3