diff options
author | Richard Carlsson <[email protected]> | 2014-05-06 10:34:07 +0200 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2015-03-10 10:21:41 +0100 |
commit | 8e7441c27485bd416abdb0253c9c92620d671065 (patch) | |
tree | 6d722b99dd0f001346450b14903e7e6f79379ecc | |
parent | 4037296294670e7a9bb2a3c1cdbd0236cbc15807 (diff) | |
download | otp-8e7441c27485bd416abdb0253c9c92620d671065.tar.gz otp-8e7441c27485bd416abdb0253c9c92620d671065.tar.bz2 otp-8e7441c27485bd416abdb0253c9c92620d671065.zip |
Make the scanned form of the io_lib format strings available for processing
This adds three new functions to io_lib - scan_format/2, unscan_format/1,
and build_text/1 - which expose the parsed form of the format control
sequences to make it possible to easily modify or filter the input to
io_lib:format/2. This can e.g. be used in order to replace unbounded-size
control sequences like ~w or ~p with corresponding depth-limited ~W and ~P
before doing the actual formatting.
-rw-r--r-- | lib/stdlib/doc/src/io.xml | 3 | ||||
-rw-r--r-- | lib/stdlib/doc/src/io_lib.xml | 70 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib.erl | 42 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_format.erl | 112 | ||||
-rw-r--r-- | lib/stdlib/test/io_SUITE.erl | 22 |
5 files changed, 224 insertions, 25 deletions
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml index a28180b42a..8ebfdb2e7f 100644 --- a/lib/stdlib/doc/src/io.xml +++ b/lib/stdlib/doc/src/io.xml @@ -505,7 +505,8 @@ ok <p>Writes the data with standard syntax in the same way as <c>~w</c>, but breaks terms whose printed representation is longer than one line into many lines and indents each - line sensibly. It also tries to detect lists of + line sensibly. Left justification is not supported. + It also tries to detect lists of printable characters and to output these as strings. The Unicode translation modifier is used for determining what characters are printable. For example:</p> diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 3312b08064..2117d66381 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1996</year><year>2013</year> + <year>1996</year><year>2014</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -59,6 +59,35 @@ <datatype> <name name="latin1_string"/> </datatype> + <datatype> + <name name="format_spec"/> + <desc><p>Description:</p> + <list type="bulleted"> + <item><p><c>control_char</c> is the type of control + sequence: <c>$P</c>, <c>$w</c>, and so on;</p> + </item> + <item><p><c>args</c> is a list of the arguments used by the + control sequence, or an empty list if the control sequence + does not take any arguments;</p> + </item> + <item><p><c>width</c> is the field width;</p> + </item> + <item><p><c>adjust</c> is the adjustment;</p> + </item> + <item><p><c>precision</c> is the precision of the printed + argument;</p> + </item> + <item><p><c>pad_char</c> is the padding character;</p> + </item> + <item><p><c>encoding</c> is set to <c>true</c> if the translation + modifier <c>t</c> is present;</p> + </item> + <item><p><c>strings</c> is set to <c>false</c> if the modifier + <c>l</c> is present.</p> + </item> + </list> + </desc> + </datatype> </datatypes> <funcs> <func> @@ -260,6 +289,45 @@ </desc> </func> <func> + <name name="scan_format" arity="2"/> + <fsummary>Parse all control sequences in the format string</fsummary> + <desc> + <p>Returns a list corresponding to the given format string, + where control sequences have been replaced with + corresponding tuples. This list can be passed to <seealso + marker="#build_text/1">io_lib:build_text/1</seealso> to have + the same effect as <c>io_lib:format(Format, Args)</c>, or to + <seealso + marker="#unscan_format/1">io_lib:unscan_format/1</seealso> + in order to get the corresponding pair of <c>Format</c> and + <c>Args</c> (with every <c>*</c> and corresponding argument + expanded to numeric values).</p> + <p>A typical use of this function is to replace unbounded-size + control sequences like <c>~w</c> and <c>~p</c> with the + depth-limited variants <c>~W</c> and <c>~P</c> before + formatting to text, e.g. in a logger.</p> + </desc> + </func> + <func> + <name name="unscan_format" arity="1"/> + <fsummary>Revert a pre-parsed format list to a plain character list + and a list of arguments</fsummary> + <desc> + <p>See <seealso + marker="#scan_format/2">io_lib:scan_format/2</seealso> for + details.</p> + </desc> + </func> + <func> + <name name="build_text" arity="1"/> + <fsummary>Build the output text for a pre-parsed format list</fsummary> + <desc> + <p>See <seealso + marker="#scan_format/2">io_lib:scan_format/2</seealso> for + details.</p> + </desc> + </func> + <func> <name name="indentation" arity="2"/> <fsummary>Indentation after printing string</fsummary> <desc> diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index adc9a0cf5f..e90cda0533 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2013. All Rights Reserved. +%% Copyright Ericsson AB 1996-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -60,6 +60,7 @@ -module(io_lib). -export([fwrite/2,fread/2,fread/3,format/2]). +-export([scan_format/2,unscan_format/1,build_text/1]). -export([print/1,print/4,indentation/2]). -export([write/1,write/2,write/3,nl/0,format_prompt/1,format_prompt/2]). @@ -83,7 +84,7 @@ deep_unicode_char_list/1]). -export_type([chars/0, latin1_string/0, continuation/0, - fread_error/0, fread_item/0]). + fread_error/0, fread_item/0, format_spec/0]). %%---------------------------------------------------------------------- @@ -108,6 +109,18 @@ -type fread_item() :: string() | atom() | integer() | float(). +-type format_spec() :: + #{ + control_char => char(), + args => [any()], + width => 'none' | integer(), + adjust => 'left' | 'right', + precision => 'none' | integer(), + pad_char => char(), + encoding => 'unicode' | 'latin1', + strings => boolean() + }. + %%---------------------------------------------------------------------- %% Interface calls to sub-modules. @@ -156,6 +169,31 @@ format(Format, Args) -> Other end. +-spec scan_format(Format, Data) -> FormatList when + Format :: io:format(), + Data :: [term()], + FormatList :: [char() | format_spec()]. + +scan_format(Format, Args) -> + try io_lib_format:scan(Format, Args) + catch + _:_ -> erlang:error(badarg, [Format, Args]) + end. + +-spec unscan_format(FormatList) -> {Format, Data} when + FormatList :: [char() | format_spec()], + Format :: io:format(), + Data :: [term()]. + +unscan_format(FormatList) -> + io_lib_format:unscan(FormatList). + +-spec build_text(FormatList) -> chars() when + FormatList :: [char() | format_spec()]. + +build_text(FormatList) -> + io_lib_format:build(FormatList). + -spec print(Term) -> chars() when Term :: term(). diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 89ae6fb187..015afb317a 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2013. All Rights Reserved. +%% Copyright Ericsson AB 1996-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -20,10 +20,9 @@ %% Formatting functions of io library. --export([fwrite/2,fwrite_g/1,indentation/2]). +-export([fwrite/2,fwrite_g/1,indentation/2,scan/2,unscan/1,build/1]). -%% fwrite(Format, ArgList) -> string(). -%% Format the arguments in ArgList after string Format. Just generate +%% Format the arguments in Args after string Format. Just generate %% an error if there is an error in the arguments. %% %% To do the printing command correctly we need to calculate the @@ -37,15 +36,84 @@ %% and it also splits the handling of the control characters into two %% parts. -fwrite(Format, Args) when is_atom(Format) -> - fwrite(atom_to_list(Format), Args); -fwrite(Format, Args) when is_binary(Format) -> - fwrite(binary_to_list(Format), Args); +-spec fwrite(Format, Data) -> FormatList when + Format :: io:format(), + Data :: [term()], + FormatList :: [char() | io_lib:format_spec()]. + fwrite(Format, Args) -> - Cs = collect(Format, Args), + build(scan(Format, Args)). + +%% Build the output text for a pre-parsed format list. + +-spec build(FormatList) -> io_lib:chars() when + FormatList :: [char() | io_lib:format_spec()]. + +build(Cs) -> Pc = pcount(Cs), build(Cs, Pc, 0). +%% Parse all control sequences in the format string. + +-spec scan(Format, Data) -> FormatList when + Format :: io:format(), + Data :: [term()], + FormatList :: [char() | io_lib:format_spec()]. + +scan(Format, Args) when is_atom(Format) -> + scan(atom_to_list(Format), Args); +scan(Format, Args) when is_binary(Format) -> + scan(binary_to_list(Format), Args); +scan(Format, Args) -> + collect(Format, Args). + +%% Revert a pre-parsed format list to a plain character list and a +%% list of arguments. + +-spec unscan(FormatList) -> {Format, Data} when + FormatList :: [char() | io_lib:format_spec()], + Format :: io:format(), + Data :: [term()]. + +unscan(Cs) -> + {print(Cs), args(Cs)}. + +args([#{args := As} | Cs]) -> + As ++ args(Cs); +args([_C | Cs]) -> + args(Cs); +args([]) -> + []. + +print([#{control_char := C, width := F, adjust := Ad, precision := P, + pad_char := Pad, encoding := Encoding, strings := Strings} | Cs]) -> + print(C, F, Ad, P, Pad, Encoding, Strings) ++ print(Cs); +print([C | Cs]) -> + [C | print(Cs)]; +print([]) -> + []. + +print(C, F, Ad, P, Pad, Encoding, Strings) -> + [$~] ++ print_field_width(F, Ad) ++ print_precision(P) ++ + print_pad_char(Pad) ++ print_encoding(Encoding) ++ + print_strings(Strings) ++ [C]. + +print_field_width(none, _Ad) -> ""; +print_field_width(F, left) -> integer_to_list(-F); +print_field_width(F, right) -> integer_to_list(F). + +print_precision(none) -> ""; +print_precision(P) -> [$. | integer_to_list(P)]. + +print_pad_char($\s) -> ""; % default, no need to make explicit +print_pad_char(Pad) -> [$., Pad]. + +print_encoding(unicode) -> "t"; +print_encoding(latin1) -> "". + +print_strings(false) -> "l"; +print_strings(true) -> "". + collect([$~|Fmt0], Args0) -> {C,Fmt1,Args1} = collect_cseq(Fmt0, Args0), [C|collect(Fmt1, Args1)]; @@ -60,7 +128,10 @@ collect_cseq(Fmt0, Args0) -> {Encoding,Fmt4,Args4} = encoding(Fmt3, Args3), {Strings,Fmt5,Args5} = strings(Fmt4, Args4), {C,As,Fmt6,Args6} = collect_cc(Fmt5, Args5), - {{C,As,F,Ad,P,Pad,Encoding,Strings},Fmt6,Args6}. + FormatSpec = #{control_char => C, args => As, width => F, adjust => Ad, + precision => P, pad_char => Pad, encoding => Encoding, + strings => Strings}, + {FormatSpec,Fmt6,Args6}. encoding([$t|Fmt],Args) -> true = hd(Fmt) =/= $l, @@ -136,17 +207,19 @@ collect_cc([$i|Fmt], [A|Args]) -> {$i,[A],Fmt,Args}. pcount(Cs) -> pcount(Cs, 0). -pcount([{$p,_As,_F,_Ad,_P,_Pad,_Enc,_Str}|Cs], Acc) -> pcount(Cs, Acc+1); -pcount([{$P,_As,_F,_Ad,_P,_Pad,_Enc,_Str}|Cs], Acc) -> pcount(Cs, Acc+1); +pcount([#{control_char := $p}|Cs], Acc) -> pcount(Cs, Acc+1); +pcount([#{control_char := $P}|Cs], Acc) -> pcount(Cs, Acc+1); pcount([_|Cs], Acc) -> pcount(Cs, Acc); pcount([], Acc) -> Acc. -%% build([Control], Pc, Indentation) -> string(). +%% build([Control], Pc, Indentation) -> io_lib:chars(). %% Interpret the control structures. Count the number of print %% remaining and only calculate indentation when necessary. Must also %% be smart when calculating indentation for characters in format. -build([{C,As,F,Ad,P,Pad,Enc,Str}|Cs], Pc0, I) -> +build([#{control_char := C, args := As, width := F, adjust := Ad, + precision := P, pad_char := Pad, encoding := Enc, + strings := Str} | Cs], Pc0, I) -> S = control(C, As, F, Ad, P, Pad, Enc, Str, I), Pc1 = decr_pc(C, Pc0), if @@ -162,10 +235,14 @@ decr_pc($p, Pc) -> Pc - 1; decr_pc($P, Pc) -> Pc - 1; decr_pc(_, Pc) -> Pc. -%% indentation(String, Indentation) -> Indentation. + %% Calculate the indentation of the end of a string given its start %% indentation. We assume tabs at 8 cols. +-spec indentation(String, StartIndent) -> integer() when + String :: io_lib:chars(), + StartIndent :: integer(). + indentation([$\n|Cs], _I) -> indentation(Cs, 0); indentation([$\t|Cs], I) -> indentation(Cs, ((I + 8) div 8) * 8); indentation([C|Cs], I) when is_integer(C) -> @@ -366,7 +443,6 @@ float_data([D|Cs], Ds) when D >= $0, D =< $9 -> float_data([_|Cs], Ds) -> float_data(Cs, Ds). -%% fwrite_g(Float) %% Writes the shortest, correctly rounded string that converts %% to Float when read back with list_to_float/1. %% @@ -374,6 +450,8 @@ float_data([_|Cs], Ds) -> %% in Proceedings of the SIGPLAN '96 Conference on Programming %% Language Design and Implementation. +-spec fwrite_g(float()) -> string(). + fwrite_g(0.0) -> "0.0"; fwrite_g(Float) when is_float(Float) -> @@ -642,7 +720,7 @@ prefixed_integer(Int, F, Adj, Base, Pad, Prefix, Lowercase) term([Prefix|S], F, Adj, none, Pad) end. -%% char(Char, Field, Adjust, Precision, PadChar) -> string(). +%% char(Char, Field, Adjust, Precision, PadChar) -> chars(). char(C, none, _Adj, none, _Pad) -> [C]; char(C, F, _Adj, none, _Pad) -> chars(C, F); diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 2203dd8f51..8d53949c40 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1999-2013. All Rights Reserved. +%% Copyright Ericsson AB 1999-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -31,7 +31,7 @@ printable_range/1, io_lib_print_binary_depth_one/1, otp_10302/1, otp_10755/1, otp_10836/1, io_lib_width_too_small/1, - io_with_huge_message_queue/1]). + io_with_huge_message_queue/1, format_string/1]). -export([pretty/2]). @@ -71,7 +71,8 @@ all() -> io_fread_newlines, otp_8989, io_lib_fread_literal, printable_range, io_lib_print_binary_depth_one, otp_10302, otp_10755, otp_10836, - io_lib_width_too_small, io_with_huge_message_queue]. + io_lib_width_too_small, io_with_huge_message_queue, + format_string]. groups() -> []. @@ -1035,7 +1036,14 @@ rp(Term, Col, Ll, D, M, RF) -> lists:flatten(io_lib:format("~s", [R])). fmt(Fmt, Args) -> - lists:flatten(io_lib:format(Fmt, Args)). + FormatList = io_lib:scan_format(Fmt, Args), + {Fmt2, Args2} = io_lib:unscan_format(FormatList), + Chars1 = lists:flatten(io_lib:build_text(FormatList)), + Chars2 = lists:flatten(io_lib:format(Fmt2, Args2)), + Chars3 = lists:flatten(io_lib:format(Fmt, Args)), + Chars1 = Chars2, + Chars2 = Chars3, + Chars3. rfd(a, 0) -> []; @@ -2261,3 +2269,9 @@ writes(0, _) -> ok; writes(N, F1) -> file:write(F1, "hello\n"), writes(N - 1, F1). + +format_string(Config) -> + %% All but padding is tested by fmt/2. + "xxxxxxsssx" = fmt("~10.4.xs", ["sss"]), + "xxxxxxsssx" = fmt("~10.4.*s", [$x, "sss"]), + ok. |