From 7859736f126397e4a62ed9e0b4563c54d8dd1158 Mon Sep 17 00:00:00 2001 From: Kostis Sagonas Date: Mon, 13 Dec 2010 15:37:43 +0200 Subject: Fix native code compiler infinite loop and update type info for 're' The introduction of filenames being unicode binaries revealed a problem in the type analysis of the native code compiler which resulted in an infinite loop when compiling the 'filename' module. In addition, the hard-coded type information for the built-in functions of the 're' module was out-of-date, which resulted in erroneous type information for 'filelib' functions being stored in the PLT. --- lib/hipe/cerl/erl_bif_types.erl | 23 ++++++++++++----------- lib/hipe/cerl/erl_types.erl | 28 +++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index ed5bf03804..eba95dc6af 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -48,6 +48,7 @@ t_boolean/0, t_byte/0, t_char/0, + t_charlist/0, t_cons/0, t_cons/2, t_cons_hd/1, @@ -4517,11 +4518,11 @@ arg_types(os, timestamp, 0) -> arg_types(re, compile, 1) -> [t_iodata()]; arg_types(re, compile, 2) -> - [t_iodata(), t_list(t_re_compile_option())]; + [t_sup(t_iodata(), t_charlist()), t_list(t_re_compile_option())]; arg_types(re, run, 2) -> - [t_iodata(), t_re_RE()]; + [t_sup(t_iodata(), t_charlist()), t_re_RE()]; arg_types(re, run, 3) -> - [t_iodata(), t_re_RE(), t_list(t_re_run_option())]; + [t_sup(t_iodata(), t_charlist()), t_re_RE(), t_list(t_re_run_option())]; %%------- string -------------------------------------------------------------- arg_types(string, chars, 2) -> [t_char(), t_non_neg_integer()]; @@ -4978,8 +4979,7 @@ t_ets_info_items() -> %% ===================================================================== t_prim_file_name() -> - t_sup([t_string(), - t_binary()]). + t_sup(t_string(), t_binary()). %% ===================================================================== %% These are used for the built-in functions of 'gen_tcp' @@ -5136,13 +5136,14 @@ t_re_MP() -> %% it's supposed to be an opaque data type t_tuple([t_atom('re_pattern'), t_integer(), t_integer(), t_binary()]). t_re_RE() -> - t_sup(t_re_MP(), t_iodata()). + t_sup([t_re_MP(), t_iodata(), t_charlist()]). t_re_compile_option() -> - t_sup([t_atoms(['anchored', 'caseless', 'dollar_endonly', 'dotall', - 'extended', 'firstline', 'multiline', 'no_auto_capture', - 'dupnames', 'ungreedy']), - t_tuple([t_atom('newline'), t_re_NLSpec()])]). + t_sup([t_atoms(['unicode', 'anchored', 'caseless', 'dollar_endonly', + 'dotall', 'extended', 'firstline', 'multiline', + 'no_auto_capture', 'dupnames', 'ungreedy']), + t_tuple([t_atom('newline'), t_re_NLSpec()]), + t_atoms(['bsr_anycrlf', 'bsr_unicode'])]). t_re_run_option() -> t_sup([t_atoms(['anchored', 'global', 'notbol', 'noteol', 'notempty']), @@ -5159,7 +5160,7 @@ t_re_Type() -> t_atoms(['index', 'list', 'binary']). t_re_NLSpec() -> - t_atoms(['cr', 'crlf', 'lf', 'anycrlf']). + t_atoms(['cr', 'crlf', 'lf', 'anycrlf', 'any']). t_re_ValueSpec() -> t_sup(t_atoms(['all', 'all_but_first', 'first', 'none']), t_re_ValueList()). diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index 1ed85af172..c8dc162457 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -62,6 +62,7 @@ t_boolean/0, t_byte/0, t_char/0, + t_charlist/0, t_collect_vars/1, t_cons/0, t_cons/2, @@ -1455,6 +1456,21 @@ t_is_tuple(_) -> false. %% Non-primitive types, including some handy syntactic sugar types %% +-spec t_charlist() -> erl_type(). + +t_charlist() -> + t_charlist(1). + +-spec t_charlist(non_neg_integer()) -> erl_type(). + +t_charlist(N) when N > 0 -> + t_maybe_improper_list(t_sup([t_unicode_char(), + t_unicode_binary(), + t_charlist(N-1)]), + t_sup(t_unicode_binary(), t_nil())); +t_charlist(0) -> + t_maybe_improper_list(t_any(), t_sup(t_unicode_binary(), t_nil())). + -spec t_constant() -> erl_type(). t_constant() -> @@ -1549,6 +1565,16 @@ t_parameterized_module() -> t_timeout() -> t_sup(t_non_neg_integer(), t_atom('infinity')). +-spec t_unicode_binary() -> erl_type(). + +t_unicode_binary() -> + t_binary(). % with characters encoded in UTF-8 coding standard + +-spec t_unicode_char() -> erl_type(). + +t_unicode_char() -> + t_integer(). % representing a valid unicode codepoint + %%----------------------------------------------------------------------------- %% Some built-in opaque types %% @@ -2825,7 +2851,7 @@ t_subtract(?list(Contents1, Termination1, Size1) = T, true -> case {Size1, Size2} of {?nonempty_qual, ?unknown_qual} -> ?none; - {?unknown_qual, ?nonempty_qual} -> Termination1; + {?unknown_qual, ?nonempty_qual} -> ?nil; {S, S} -> ?none end; false -> -- cgit v1.2.3 From 5aa0bc494167f30062eb1e4cc6968c3818ec35d1 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 16 Dec 2010 15:54:31 +0100 Subject: Set types correctly for open_port({spawn_executable, ... --- lib/hipe/cerl/erl_bif_types.erl | 11 ++++++----- lib/hipe/cerl/erl_types.erl | 6 ++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index eba95dc6af..f00821e450 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -125,7 +125,8 @@ t_tuple/1, t_tuple_args/1, t_tuple_size/1, - t_tuple_subtypes/1 + t_tuple_subtypes/1, + t_unicode_string/0 ]). -ifdef(DO_ERL_BIF_TYPES_TEST). @@ -3800,7 +3801,7 @@ arg_types(erlang, now, 0) -> arg_types(erlang, open_port, 2) -> [t_sup(t_atom(), t_sup([t_tuple([t_atom('spawn'), t_string()]), t_tuple([t_atom('spawn_driver'), t_string()]), - t_tuple([t_atom('spawn_executable'), t_string()]), + t_tuple([t_atom('spawn_executable'), t_sup(t_unicode_string(),t_binary())]), t_tuple([t_atom('fd'), t_integer(), t_integer()])])), t_list(t_sup(t_sup([t_atom('stream'), t_atom('exit_status'), @@ -3816,8 +3817,8 @@ arg_types(erlang, open_port, 2) -> t_tuple([t_atom('line'), t_integer()]), t_tuple([t_atom('cd'), t_string()]), t_tuple([t_atom('env'), t_list(t_tuple(2))]), % XXX: More - t_tuple([t_atom('args'), t_list(t_string())]), - t_tuple([t_atom('arg0'), t_string()])])))]; + t_tuple([t_atom('args'), t_list(t_sup(t_unicode_string(),t_binary()))]), + t_tuple([t_atom('arg0'),t_sup(t_unicode_string(),t_binary())])])))]; arg_types(erlang, phash, 2) -> [t_any(), t_pos_integer()]; arg_types(erlang, phash2, 1) -> @@ -4979,7 +4980,7 @@ t_ets_info_items() -> %% ===================================================================== t_prim_file_name() -> - t_sup(t_string(), t_binary()). + t_sup(t_unicode_string(), t_binary()). %% ===================================================================== %% These are used for the built-in functions of 'gen_tcp' diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index c8dc162457..080d6936b2 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -196,6 +196,7 @@ t_tuple_size/1, t_tuple_sizes/1, t_tuple_subtypes/1, + t_unicode_string/0, t_unify/2, t_unify/3, t_unit/0, @@ -1456,6 +1457,11 @@ t_is_tuple(_) -> false. %% Non-primitive types, including some handy syntactic sugar types %% +-spec t_unicode_string() -> erl_type(). + +t_unicode_string() -> + t_list(t_unicode_char()). + -spec t_charlist() -> erl_type(). t_charlist() -> -- cgit v1.2.3 From 03b9a1912b6bd2eb9c86b3fd010c0aea792536b9 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 17 Dec 2010 17:05:47 +0100 Subject: Remove dead code (and dialyzer errors) from filename and re --- lib/stdlib/src/filename.erl | 14 ++++++-------- lib/stdlib/src/re.erl | 42 +++++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index e38b8957f2..24abf1e977 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -165,8 +165,6 @@ basename1([$/|[]], Tail, DirSep2) -> basename1([], Tail, DirSep2); basename1([$/|Rest], _Tail, DirSep2) -> basename1(Rest, [], DirSep2); -basename1([[_|_]=List|Rest], Tail, DirSep2) -> - basename1(List++Rest, Tail, DirSep2); basename1([DirSep2|Rest], Tail, DirSep2) when is_integer(DirSep2) -> basename1([$/|Rest], Tail, DirSep2); basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) -> @@ -280,8 +278,6 @@ dirname(Name0) -> Name = flatten(Name0), dirname(Name, [], [], separators()). -dirname([[_|_]=List|Rest], Dir, File, Seps) -> - dirname(List++Rest, Dir, File, Seps); dirname([$/|Rest], Dir, File, Seps) -> dirname(Rest, File++Dir, [$/], Seps); dirname([DirSep|Rest], Dir, File, {DirSep,_}=Seps) when is_integer(DirSep) -> @@ -346,8 +342,6 @@ extension(Name) when is_binary(Name) -> [] end, case binary:matches(Name,[<<".">>]) of - nomatch -> % Bug in binary workaround :( - <<>>; [] -> <<>>; List -> @@ -479,6 +473,12 @@ maybe_remove_dirsep(Name, _) -> %% by a previous call to join/{1,2}. -spec append(file:filename(), file:name()) -> file:filename(). +append(Dir, Name) when is_binary(Dir), is_binary(Name) -> + <>; +append(Dir, Name) when is_binary(Dir) -> + append(Dir,filename_string_to_binary(Name)); +append(Dir, Name) when is_binary(Name) -> + append(filename_string_to_binary(Dir),Name); append(Dir, Name) -> Dir ++ [$/|Name]. @@ -685,8 +685,6 @@ split([$/|Rest], Comp, Components, OsType) -> split(Rest, [], [lists:reverse(Comp)|Components], OsType); split([Char|Rest], Comp, Components, OsType) when is_integer(Char) -> split(Rest, [Char|Comp], Components, OsType); -split([List|Rest], Comp, Components, OsType) when is_list(List) -> - split(List++Rest, Comp, Components, OsType); split([], [], Components, _OsType) -> lists:reverse(Components); split([], Comp, Components, OsType) -> diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 296a6b3d23..9642de17b4 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -208,29 +208,25 @@ replace(Subject,RE,Replacement,Options) -> process_repl_params(Options,iodata,false), FlatSubject = to_binary(Subject, Unicode), FlatReplacement = to_binary(Replacement, Unicode), - case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of - {error,_Err} -> - throw(badre); - IoList -> - case Convert of - iodata -> - IoList; - binary -> - case Unicode of - false -> - iolist_to_binary(IoList); - true -> - unicode:characters_to_binary(IoList,unicode) - end; - list -> - case Unicode of - false -> - binary_to_list(iolist_to_binary(IoList)); - true -> - unicode:characters_to_list(IoList,unicode) - end - end - end + IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt), + case Convert of + iodata -> + IoList; + binary -> + case Unicode of + false -> + iolist_to_binary(IoList); + true -> + unicode:characters_to_binary(IoList,unicode) + end; + list -> + case Unicode of + false -> + binary_to_list(iolist_to_binary(IoList)); + true -> + unicode:characters_to_list(IoList,unicode) + end + end catch throw:badopt -> erlang:error(badarg,[Subject,RE,Replacement,Options]); -- cgit v1.2.3 From 0fe99329f6d39a5dfddda689a90917982fa2ee0b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 21 Dec 2010 17:12:37 +0100 Subject: Correct type specs in io --- lib/stdlib/src/io.erl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 1d0f9374bc..78412ab2bc 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -39,6 +39,8 @@ -type device() :: atom() | pid(). -type prompt() :: atom() | string(). +-type error_description() :: term(). % Whatever the io-server sends. +-type request_error() :: {'error',error_description()}. %% XXX: Some uses of line() in this file may need to read erl_scan:location() -type line() :: pos_integer(). @@ -299,32 +301,32 @@ format(Io, Format, Args) -> %% Scanning Erlang code. --spec scan_erl_exprs(prompt()) -> erl_scan:tokens_result(). +-spec scan_erl_exprs(prompt()) -> erl_scan:tokens_result() | request_error(). scan_erl_exprs(Prompt) -> scan_erl_exprs(default_input(), Prompt, 1). --spec scan_erl_exprs(device(), prompt()) -> erl_scan:tokens_result(). +-spec scan_erl_exprs(device(), prompt()) -> erl_scan:tokens_result() | request_error(). scan_erl_exprs(Io, Prompt) -> scan_erl_exprs(Io, Prompt, 1). --spec scan_erl_exprs(device(), prompt(), line()) -> erl_scan:tokens_result(). +-spec scan_erl_exprs(device(), prompt(), line()) -> erl_scan:tokens_result() | request_error(). scan_erl_exprs(Io, Prompt, Pos0) -> request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). --spec scan_erl_form(prompt()) -> erl_scan:tokens_result(). +-spec scan_erl_form(prompt()) -> erl_scan:tokens_result() | request_error(). scan_erl_form(Prompt) -> scan_erl_form(default_input(), Prompt, 1). --spec scan_erl_form(device(), prompt()) -> erl_scan:tokens_result(). +-spec scan_erl_form(device(), prompt()) -> erl_scan:tokens_result() | request_error(). scan_erl_form(Io, Prompt) -> scan_erl_form(Io, Prompt, 1). --spec scan_erl_form(device(), prompt(), line()) -> erl_scan:tokens_result(). +-spec scan_erl_form(device(), prompt(), line()) -> erl_scan:tokens_result() | request_error(). scan_erl_form(Io, Prompt, Pos0) -> request(Io, {get_until,unicode,Prompt,erl_scan,tokens,[Pos0]}). @@ -335,7 +337,8 @@ scan_erl_form(Io, Prompt, Pos0) -> -type parse_ret() :: {'ok', erl_parse_expr_list(), line()} | {'eof', line()} - | {'error', erl_scan:error_info(), line()}. + | {'error', erl_scan:error_info(), line()} + | request_error(). -spec parse_erl_exprs(prompt()) -> parse_ret(). @@ -364,7 +367,8 @@ parse_erl_exprs(Io, Prompt, Pos0) -> -type parse_form_ret() :: {'ok', erl_parse_absform(), line()} | {'eof', line()} - | {'error', erl_scan:error_info(), line()}. + | {'error', erl_scan:error_info(), line()} + | request_error(). -spec parse_erl_form(prompt()) -> parse_form_ret(). -- cgit v1.2.3