From 9c1cb443a22679f038451b4491c4c50ec53f77a2 Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Thu, 8 Mar 2018 12:31:49 +0100 Subject: Allow multiple modifier characters in io:format control sequences This makes it possible to print unicode atoms at the same time as suppressing detection of printable lists. --- lib/stdlib/doc/src/io.xml | 33 ++++++++++++++++------- lib/stdlib/src/erl_lint.erl | 8 ++++++ lib/stdlib/src/io_lib_format.erl | 36 ++++++++++++-------------- lib/stdlib/test/io_SUITE.erl | 56 +++++++++++++++++++++++++++++++--------- 4 files changed, 92 insertions(+), 41 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml index 6a7c06188b..f1037ec76b 100644 --- a/lib/stdlib/doc/src/io.xml +++ b/lib/stdlib/doc/src/io.xml @@ -137,11 +137,11 @@ Hello world! ok

The general format of a control sequence is ~F.P.PadModC.

-

Character C determines the type of control sequence - to be used, F and P are optional numeric - arguments. If F, P, or Pad is *, - the next argument in Data is used as the numeric value - of F or P.

+

The character C determines the type of control sequence + to be used. It is the only required field. All of F, + P, Pad, and Mod are optional. For example, + to use a # for Pad but use the default values for + F and P, you can write ~..#C.

F is the field width of the printed argument. A @@ -167,13 +167,26 @@ ok The default padding character is ' ' (space).

-

Mod is the control sequence modifier. It is a - single character that changes the interpretation of - Data. This can be t, for Unicode translation, - or l, for stopping p and P from - detecting printable characters.

+

Mod is the control sequence modifier. This is + one or more characters that change the interpretation of + Data. The current modifiers are t, for Unicode + translation, and l, for stopping p and P + from detecting printable characters.

+

If F, P, or Pad is a * character, + the next argument in Data is used as the value. + For example:

+
+1> io:fwrite("~*.*.0f~n",[9, 5, 3.14159265]).
+003.14159
+ok
+

To use a literal * character as Pad, it must be + passed as an argument:

+
+2> io:fwrite("~*.*.*f~n",[9, 5, $*, 3.14159265]).
+**3.14159
+ok

Available control sequences:

~ diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 1930c462e8..9a62d21d34 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3971,6 +3971,8 @@ extract_sequence(3, [$.,_|Fmt], Need) -> extract_sequence(4, Fmt, Need); extract_sequence(3, Fmt, Need) -> extract_sequence(4, Fmt, Need); +extract_sequence(4, [$t, $l | Fmt], Need) -> + extract_sequence(4, [$l, $t | Fmt], Need); extract_sequence(4, [$t, $c | Fmt], Need) -> extract_sequence(5, [$c|Fmt], Need); extract_sequence(4, [$t, $s | Fmt], Need) -> @@ -3987,8 +3989,14 @@ extract_sequence(4, [$t, C | _Fmt], _Need) -> {error,"invalid control ~t" ++ [C]}; extract_sequence(4, [$l, $p | Fmt], Need) -> extract_sequence(5, [$p|Fmt], Need); +extract_sequence(4, [$l, $t, $p | Fmt], Need) -> + extract_sequence(5, [$p|Fmt], Need); extract_sequence(4, [$l, $P | Fmt], Need) -> extract_sequence(5, [$P|Fmt], Need); +extract_sequence(4, [$l, $t, $P | Fmt], Need) -> + extract_sequence(5, [$P|Fmt], Need); +extract_sequence(4, [$l, $t, C | _Fmt], _Need) -> + {error,"invalid control ~lt" ++ [C]}; extract_sequence(4, [$l, C | _Fmt], _Need) -> {error,"invalid control ~l" ++ [C]}; extract_sequence(4, Fmt, Need) -> diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index e345810ca0..3e1f90ac24 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -126,25 +126,23 @@ collect_cseq(Fmt0, Args0) -> {F,Ad,Fmt1,Args1} = field_width(Fmt0, Args0), {P,Fmt2,Args2} = precision(Fmt1, Args1), {Pad,Fmt3,Args3} = pad_char(Fmt2, Args2), - {Encoding,Fmt4,Args4} = encoding(Fmt3, Args3), - {Strings,Fmt5,Args5} = strings(Fmt4, Args4), - {C,As,Fmt6,Args6} = collect_cc(Fmt5, Args5), - FormatSpec = #{control_char => C, args => As, width => F, adjust => Ad, - precision => P, pad_char => Pad, encoding => Encoding, - strings => Strings}, - {FormatSpec,Fmt6,Args6}. - -encoding([$t|Fmt],Args) -> - true = hd(Fmt) =/= $l, - {unicode,Fmt,Args}; -encoding(Fmt,Args) -> - {latin1,Fmt,Args}. - -strings([$l|Fmt],Args) -> - true = hd(Fmt) =/= $t, - {false,Fmt,Args}; -strings(Fmt,Args) -> - {true,Fmt,Args}. + Spec0 = #{width => F, + adjust => Ad, + precision => P, + pad_char => Pad, + encoding => latin1, + strings => true}, + {Spec1,Fmt4} = modifiers(Fmt3, Spec0), + {C,As,Fmt5,Args4} = collect_cc(Fmt4, Args3), + Spec2 = Spec1#{control_char => C, args => As}, + {Spec2,Fmt5,Args4}. + +modifiers([$t|Fmt], Spec) -> + modifiers(Fmt, Spec#{encoding => unicode}); +modifiers([$l|Fmt], Spec) -> + modifiers(Fmt, Spec#{strings => false}); +modifiers(Fmt, Spec) -> + {Spec, Fmt}. field_width([$-|Fmt0], Args0) -> {F,Fmt,Args} = field_value(Fmt0, Args0), diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 45363c0592..46e96a8284 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -1905,29 +1905,61 @@ otp_10836(Suite) when is_list(Suite) -> %% OTP-10755. The 'l' modifier otp_10755(Suite) when is_list(Suite) -> + %% printing plain ascii characters S = "string", "\"string\"" = fmt("~p", [S]), "[115,116,114,105,110,103]" = fmt("~lp", [S]), "\"string\"" = fmt("~P", [S, 2]), "[115|...]" = fmt("~lP", [S, 2]), - {'EXIT',{badarg,_}} = (catch fmt("~ltp", [S])), - {'EXIT',{badarg,_}} = (catch fmt("~tlp", [S])), - {'EXIT',{badarg,_}} = (catch fmt("~ltP", [S])), - {'EXIT',{badarg,_}} = (catch fmt("~tlP", [S])), + %% printing latin1 chars, with and without modifiers + T = {[255],list_to_atom([255]),[a,b,c]}, + "{\"ÿ\",ÿ,[a,b,c]}" = fmt("~p", [T]), + "{\"ÿ\",ÿ,[a,b,c]}" = fmt("~tp", [T]), + "{[255],ÿ,[a,b,c]}" = fmt("~lp", [T]), + "{[255],ÿ,[a,b,c]}" = fmt("~ltp", [T]), + "{[255],ÿ,[a,b,c]}" = fmt("~tlp", [T]), + "{\"ÿ\",ÿ,...}" = fmt("~P", [T,3]), + "{\"ÿ\",ÿ,...}" = fmt("~tP", [T,3]), + "{[255],ÿ,...}" = fmt("~lP", [T,3]), + "{[255],ÿ,...}" = fmt("~ltP", [T,3]), + "{[255],ÿ,...}" = fmt("~tlP", [T,3]), + %% printing unicode chars, with and without modifiers + U = {[666],list_to_atom([666]),[a,b,c]}, + "{[666],'\\x{29A}',[a,b,c]}" = fmt("~p", [U]), + case io:printable_range() of + unicode -> + "{\"ʚ\",'ʚ',[a,b,c]}" = fmt("~tp", [U]), + "{\"ʚ\",'ʚ',...}" = fmt("~tP", [U,3]); + latin1 -> + "{[666],'ʚ',[a,b,c]}" = fmt("~tp", [U]), + "{[666],'ʚ',...}" = fmt("~tP", [U,3]) + end, + "{[666],'\\x{29A}',[a,b,c]}" = fmt("~lp", [U]), + "{[666],'ʚ',[a,b,c]}" = fmt("~ltp", [U]), + "{[666],'ʚ',[a,b,c]}" = fmt("~tlp", [U]), + "{[666],'\\x{29A}',...}" = fmt("~P", [U,3]), + "{[666],'\\x{29A}',...}" = fmt("~lP", [U,3]), + "{[666],'ʚ',...}" = fmt("~ltP", [U,3]), + "{[666],'ʚ',...}" = fmt("~tlP", [U,3]), + %% the compiler should catch uses of ~l with other than pP Text = "-module(l_mod).\n" "-export([t/0]).\n" "t() ->\n" " S = \"string\",\n" - " io:format(\"~ltp\", [S]),\n" - " io:format(\"~tlp\", [S]),\n" - " io:format(\"~ltP\", [S, 1]),\n" - " io:format(\"~tlP\", [S, 1]).\n", + " io:format(\"~lw\", [S]),\n" + " io:format(\"~lW\", [S, 1]),\n" + " io:format(\"~ltw\", [S]),\n" + " io:format(\"~tlw\", [S]),\n" + " io:format(\"~ltW\", [S, 1]),\n" + " io:format(\"~tlW\", [S, 1]).\n", {ok,l_mod,[{_File,Ws}]} = compile_file("l_mod.erl", Text, Suite), - ["format string invalid (invalid control ~lt)", - "format string invalid (invalid control ~tl)", - "format string invalid (invalid control ~lt)", - "format string invalid (invalid control ~tl)"] = + ["format string invalid (invalid control ~lw)", + "format string invalid (invalid control ~lW)", + "format string invalid (invalid control ~ltw)", + "format string invalid (invalid control ~ltw)", + "format string invalid (invalid control ~ltW)", + "format string invalid (invalid control ~ltW)"] = [lists:flatten(M:format_error(E)) || {_L,M,E} <- Ws], ok. -- cgit v1.2.3