From 83b6daef8d52f69c3b583bcc67c0c11fb8dcba0b Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Fri, 21 Mar 2014 15:34:18 +0100 Subject: stdlib: Generalize erl_parse:abstract/2 The 'encoding' option of erl_parse:abstract/2 has been extended to include 'none' and a callback function (a predicate). The rationale is that a more general means of determining what integer lists are to be represented as strings may help readability when generating Erlang code given input in some other encoding than Latin-1 or UTF-8. --- lib/stdlib/doc/src/erl_parse.xml | 10 ++++-- lib/stdlib/src/erl_parse.yrl | 64 ++++++++++++++++++++------------------ lib/stdlib/test/erl_scan_SUITE.erl | 26 ++++++++++++++-- 3 files changed, 65 insertions(+), 35 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/erl_parse.xml b/lib/stdlib/doc/src/erl_parse.xml index 2d5aff3c6c..cf0bff48cd 100644 --- a/lib/stdlib/doc/src/erl_parse.xml +++ b/lib/stdlib/doc/src/erl_parse.xml @@ -4,7 +4,7 @@
- 19962013 + 19962014 Ericsson AB. All Rights Reserved. @@ -173,6 +173,7 @@ + Convert an Erlang term into an abstract form

Converts the Erlang data structure Data into an @@ -183,7 +184,12 @@ selecting which integer lists will be considered as strings. The default is to use the encoding returned by - epp:default_encoding/0

+ epp:default_encoding/0. + The value none means that no integer lists will be + considered as strings. The encoding_func() will be + called with one integer of a list at a time, and if it + returns true for every integer the list will be + considered a string.

diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl index 6316db7054..1dc5fc52a7 100644 --- a/lib/stdlib/src/erl_parse.yrl +++ b/lib/stdlib/src/erl_parse.yrl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2013. All Rights Reserved. +%% Copyright Ericsson AB 1996-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -919,59 +919,63 @@ normalise_list([]) -> Data :: term(), AbsTerm :: abstract_expr(). abstract(T) -> - abstract(T, 0, epp:default_encoding()). + abstract(T, 0, enc_func(epp:default_encoding())). + +-type encoding_func() :: fun((non_neg_integer()) -> boolean()). %%% abstract/2 takes line and encoding options -spec abstract(Data, Options) -> AbsTerm when Data :: term(), Options :: Line | [Option], Option :: {line, Line} | {encoding, Encoding}, - Encoding :: latin1 | unicode | utf8, + Encoding :: 'latin1' | 'unicode' | 'utf8' | 'none' | encoding_func(), Line :: erl_scan:line(), AbsTerm :: abstract_expr(). abstract(T, Line) when is_integer(Line) -> - abstract(T, Line, epp:default_encoding()); + abstract(T, Line, enc_func(epp:default_encoding())); abstract(T, Options) when is_list(Options) -> Line = proplists:get_value(line, Options, 0), Encoding = proplists:get_value(encoding, Options,epp:default_encoding()), - abstract(T, Line, Encoding). + EncFunc = enc_func(Encoding), + abstract(T, Line, EncFunc). -define(UNICODE(C), - is_integer(C) andalso - (C >= 0 andalso C < 16#D800 orelse + (C < 16#D800 orelse C > 16#DFFF andalso C < 16#FFFE orelse C > 16#FFFF andalso C =< 16#10FFFF)). +enc_func(latin1) -> fun(C) -> C < 256 end; +enc_func(unicode) -> fun(C) -> ?UNICODE(C) end; +enc_func(utf8) -> fun(C) -> ?UNICODE(C) end; +enc_func(none) -> none; +enc_func(Fun) when is_function(Fun, 1) -> Fun; +enc_func(Term) -> erlang:error({badarg, Term}). + abstract(T, L, _E) when is_integer(T) -> {integer,L,T}; abstract(T, L, _E) when is_float(T) -> {float,L,T}; abstract(T, L, _E) when is_atom(T) -> {atom,L,T}; abstract([], L, _E) -> {nil,L}; abstract(B, L, _E) when is_bitstring(B) -> {bin, L, [abstract_byte(Byte, L) || Byte <- bitstring_to_list(B)]}; -abstract([C|T], L, unicode=E) when ?UNICODE(C) -> - abstract_unicode_string(T, [C], L, E); -abstract([C|T], L, utf8=E) when ?UNICODE(C) -> - abstract_unicode_string(T, [C], L, E); -abstract([C|T], L, latin1=E) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C], L, E); -abstract([H|T], L, E) -> +abstract([H|T], L, none=E) -> {cons,L,abstract(H, L, E),abstract(T, L, E)}; +abstract(List, L, E) when is_list(List) -> + abstract_list(List, [], L, E); abstract(Tuple, L, E) when is_tuple(Tuple) -> - {tuple,L,abstract_list(tuple_to_list(Tuple), L, E)}. - -abstract_string([C|T], String, L, E) when is_integer(C), 0 =< C, C < 256 -> - abstract_string(T, [C|String], L, E); -abstract_string([], String, L, _E) -> - {string, L, lists:reverse(String)}; -abstract_string(T, String, L, E) -> - not_string(String, abstract(T, L, E), L, E). - -abstract_unicode_string([C|T], String, L, E) when ?UNICODE(C) -> - abstract_unicode_string(T, [C|String], L, E); -abstract_unicode_string([], String, L, _E) -> + {tuple,L,abstract_tuple_list(tuple_to_list(Tuple), L, E)}. + +abstract_list([H|T], String, L, E) -> + case is_integer(H) andalso H >= 0 andalso E(H) of + true -> + abstract_list(T, [H|String], L, E); + false -> + AbstrList = {cons,L,abstract(H, L, E),abstract(T, L, E)}, + not_string(String, AbstrList, L, E) + end; +abstract_list([], String, L, _E) -> {string, L, lists:reverse(String)}; -abstract_unicode_string(T, String, L, E) -> +abstract_list(T, String, L, E) -> not_string(String, abstract(T, L, E), L, E). not_string([C|T], Result, L, E) -> @@ -979,9 +983,9 @@ not_string([C|T], Result, L, E) -> not_string([], Result, _L, _E) -> Result. -abstract_list([H|T], L, E) -> - [abstract(H, L, E)|abstract_list(T, L, E)]; -abstract_list([], _L, _E) -> +abstract_tuple_list([H|T], L, E) -> + [abstract(H, L, E)|abstract_tuple_list(T, L, E)]; +abstract_tuple_list([], _L, _E) -> []. abstract_byte(Byte, L) when is_integer(Byte) -> diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 447e159cd4..35067e8116 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2013. All Rights Reserved. +%% Copyright Ericsson AB 1998-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -21,7 +21,7 @@ init_per_group/2,end_per_group/2]). -export([ error_1/1, error_2/1, iso88591/1, otp_7810/1, otp_10302/1, - otp_10990/1, otp_10992/1]). + otp_10990/1, otp_10992/1, otp_11807/1]). -import(lists, [nth/2,flatten/1]). -import(io_lib, [print/1]). @@ -60,7 +60,8 @@ end_per_testcase(_Case, Config) -> suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> - [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992]. + [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992, + otp_11807]. groups() -> [{error, [], [error_1, error_2]}]. @@ -1144,6 +1145,25 @@ otp_10992(Config) when is_list(Config) -> erl_parse:abstract([$A,42.0], [{encoding,utf8}]), ok. +otp_11807(doc) -> + "OTP-11807. Generalize erl_parse:abstract/2."; +otp_11807(suite) -> + []; +otp_11807(Config) when is_list(Config) -> + {cons,0,{integer,0,97},{cons,0,{integer,0,98},{nil,0}}} = + erl_parse:abstract("ab", [{encoding,none}]), + {cons,0,{integer,0,-1},{nil,0}} = + erl_parse:abstract([-1], [{encoding,latin1}]), + ASCII = fun(I) -> I >= 0 andalso I < 128 end, + {string,0,"xyz"} = erl_parse:abstract("xyz", [{encoding,ASCII}]), + {cons,0,{integer,0,228},{nil,0}} = + erl_parse:abstract([228], [{encoding,ASCII}]), + {cons,0,{integer,0,97},{atom,0,a}} = + erl_parse:abstract("a"++a, [{encoding,latin1}]), + {'EXIT', {{badarg,bad},_}} = % minor backward incompatibility + (catch erl_parse:abstract("string", [{encoding,bad}])), + ok. + test_string(String, Expected) -> {ok, Expected, _End} = erl_scan:string(String), test(String). -- cgit v1.2.3